EvalCompra Ok

2024-08-03 01:38:04 -05:00 · 2024-08-03 01:38:04 -05:00 · 881d3074cf
parent b5f235f5ae
commit 881d3074cf
4 changed files with 340 additions and 69 deletions
--- a/apis.py
+++ b/apis.py
@ -45,6 +45,14 @@ class Response(BaseModel):
    """
    path: str = Query("", description="Style and sentiments of text")
    model : str = Query("whisper", description="Style and sentiments of text")
 class Response4(BaseModel):
    path: str = Query("", description="path file")
    system: str = Query("", description="prompt system LLM model with ocr and image claude")
    content: str = Query("%s", description="prompt content LLM  model with ocr")
    max_tokens: int = Query(1024, description="maxtoken LLM OCR model")
    model: str = Query("Claude-sonnet", description="model")
 class Response1(BaseModel):
    path: str = Query("", description="path file")
    task_prompt: str = Query("", description="task of model")
@ -66,7 +74,14 @@ class Response3(BaseModel):
    """
    path: str = Query("", description="Style and sentiments of text")
    Trusted: str = Query("", description="Style and sentiments of text")
-    mode : str = Query("whisper", description="Style and sentiments of text")
+    mode : str = Query("", description="Style and sentiments of text")
 class Response5(BaseModel):
    """Structure of data to querry of make post from X or article blog
    """
    prompt: str = Query("", description="Style and sentiments of text")
    mode : str = Query("", description="Style and sentiments of text")
 #Funcionales
@app.get("/addTrusted")
@ -99,11 +114,11 @@ def addTrusted(response:Response3):
            content={"content": "file no found" }
            )
    if mode_list[mode]=="texto":
-        hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt"
+        info=str({"path":path,"trusted":Trusted,"mode":mode})
-        f = open("example/texto/"+hash1, "w")
+        hash1 = hashlib.sha256(info.encode()).hexdigest()
-        f.write(path)
+        # with open("example/texto/"+hash1, 'w') as f:
-        f.close()
+        #     json.dump(info, f)
-        path=pwd+"/"+pathText+hash1
+        # path=pwd+"/"+pathText+hash1
        length=len(Trusted)
        size=0
        duration=0
@ -112,23 +127,64 @@ def addTrusted(response:Response3):
        size=file_stats.st_size / (1024 * 1024)
        length=0
        duration=0
        hash1=""
    elif mode_list[mode]=="audio":
        with audioread.audio_open(path) as f: 
            duration = f.duration
            length=0
            size=0
-
+            hash1=""
    if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0:
-        db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length )
+        db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length,hash=hash1 )
        db.commit()
        return "Add %s in mode %s"%(path,mode)
    else:
        item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last()
        modification_count=item.modification_count + 1
-        db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count)
+        db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count,hash=hash1)
        db.commit()
        return "Update %s in mode %s"%(path,mode)
@app.get("/addPrompt")
@app.post("/addPrompt")
 def addPrompt(response:Response5):
    """Api to add information of Trusted data
    Args:
        response (Response3): 3 params:
        path : path of archive on system if is a file OR text if is text.
        Trusted : information Trusted or better information in a process.
        mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice,
    Returns:
        _type_: _description_
    """
    prompt=response.prompt
    mode=response.mode
    last_modified=datetime.now()
    if mode not in mode_list.keys():
        return JSONResponse(
        status_code=404,
        content={"content": "mode no found" }
    )
    if mode == "llm_compra":
        hash1 = str(hashlib.sha256(prompt.encode()).hexdigest())
        # with open("example/texto/"+hash1, 'w') as f:
        #     json.dump(info, f)
        # path=pwd+"/"+pathText+hash1
        length=len(prompt)
    if db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).count()==0:
        db.prompt.insert(prompt=prompt,mode=mode,last_modified=last_modified,length=length,hash=hash1 )
        db.commit()
        return "Add %s in mode %s"%(prompt,mode)
    else:
        A=db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).update(prompt=prompt,mode=mode,last_modified=last_modified,length=length+1,hash=hash1)
        db.commit()
        print(A,last_modified)
        return "Update %s in mode %s"%(prompt,mode)
@app.get("/EvalVoice")
@app.post("/EvalVoice")
 def EvalVoice(response:Response):
@ -141,17 +197,18 @@ def EvalVoice(response:Response):
    )
    Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted
    print(Trusted)
    if model=="whisper":
        Sal=main.EvalWhisper(path,Trusted)
    else:
        Sal=main.EvalVosk(path,Trusted)
    Sal["last_modified"]=datetime.now()
-    if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
+    if db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).count()==0:
        print(1,Sal)
        db.analitic_voice.insert(**Sal)
        db.commit()
    else:
-        db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
+        print(2,Sal)
        db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
        db.commit()
    return Sal
@ -192,7 +249,7 @@ def EvalVoicehtml():
    </style>
 </head>
 <body>
-    <h1>Petición POST a API</h1>
+    <h1>Petición Evaluar modelo de voz comtra datos curados</h1>
    <select id="texto1">
        %s
@ -237,6 +294,137 @@ def EvalVoicehtml():
    """%(Sal)
    return HTMLResponse(content=html, status_code=200)
@app.get("/EvalLLMCompra")
@app.post("/EvalLLMCompra")
 def EvalLLMCompra(response:Response4):
    content=response.path
    model=response.model
    system= response.system
    max_tokens= response.max_tokens
    path=content
    if db((db.trusted.path == path ) & ( db.trusted.mode == "llm_compra")).count()==0:
        return JSONResponse(
        status_code=404,
        content={"content": "Trusted no found" }
    )
    Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "llm_compra")).select().last().trusted
    Sal=main.EvalModelLLMCompra(system,content,model,max_tokens,Trusted)
    Sal["last_modified"]=datetime.now()
    if db((db.analitic_llm_compra.path == Sal["path"]) & (db.analitic_llm_compra.model == Sal["model"])).count()==0:
        print(1,Sal)
        db.analitic_llm_compra.insert(**Sal)
        db.commit()
    else:
        print(2,Sal)
        db((db.analitic_llm_compra.path == Sal["path"]) & (db.analitic_llm_compra.model == Sal["model"])).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
        db.commit()
    return Sal
@app.get("/evalllmcomprahtml")
 def EvalLLMComprahtml():
    dir_list = db((db.trusted.mode == "llm_compra" )).select()
    Sal=""
    t=1
    for i in dir_list:
        temp="""<option value="%s">Opción %s, %s</option>
        """%(i.path,str(t),str(i.path))
        Sal=Sal+temp
        t=t+1
    dir_list2 = db((db.prompt.mode == "llm_compra" )).select()
    Sal2=""
    t=1
    for i in dir_list2:
        temp="""<option value="%s">Opción %s, %s</option>
        """%(i.prompt,str(t),str(i.prompt))
        Sal2=Sal2+temp
        t=t+1
    html="""<!DOCTYPE html>
 <html lang="es">
 <head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Evaluacion de modelos voice2txt</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
        }
        input, button {
            margin: 10px 0;
            padding: 5px;
        }
        #respuesta {
            margin-top: 20px;
            padding: 10px;
            border: 1px solid #ccc;
            background-color: #f9f9f9;
        }
    </style>
 </head>
 <body>
    <h1>Petición Evaluar modelo de voz comtra datos curados</h1>
    <select id="texto1">
        %s
    </select>
    <br>
    <select id="texto2">
        <option value="meta-llama/Meta-Llama-3.1-70B-Instruct">meta-llama/Meta-Llama-3.1-70B-Instruct</option>
        <option value="meta-llama/Meta-Llama-3.1-8B-Instruct">meta-llama/Meta-Llama-3.1-8B-Instruct</option>
        <option value="Mistral">Mistral</option>
    </select>
        <br>
    <select id="texto3">
        %s
    </select>
        <br>
    <input type="text" id="texto4" placeholder="max_tokens">
    <br>
    <button onclick="enviarPeticion()">Enviar petición</button>
    <div id="respuesta"></div>
    <script>
        function enviarPeticion() {
            const texto1 = document.getElementById('texto1').value;
            const texto2 = document.getElementById('texto2').value;
            const texto3 = document.getElementById('texto3').value;
            const datos = {
                path: texto1,
                model: texto2,
                system: texto3
            };
            fetch('/EvalLLMCompra', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify(datos)
            })
            .then(response => response.json())
            .then(data => {
                document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
            })
            .catch(error => {
                document.getElementById('respuesta').innerHTML = 'Error: ' + error;
            });
        }
    </script>
 </body>
 </html>
    """%(Sal,Sal2)
    return HTMLResponse(content=html, status_code=200)
 #Por revisar
 def list2tablehtml(listdata,model):
--- a/databases.py
+++ b/databases.py
@ -9,8 +9,20 @@ db.define_table(
    Field("sizeMB",type="double",default=0),# audio,factura
    Field("length",type="integer",default=0),#texto
    Field('last_modified', 'datetime'),
-    Field('modification_count', 'integer', default=0)
+    Field('modification_count', 'integer', default=0),
    Field('hash')
 )
 db.define_table(
    "prompt",
    Field("prompt"),
    Field("mode"),
    Field("length",type="integer",default=0),
    Field('hash',unique=True),
    Field('last_modified', 'datetime'),
 )
 db.define_table(
    "analitic_voice",
    Field("content"),
--- a/gui.py
+++ b/gui.py
@ -7,12 +7,9 @@ import pandas as pd
 import requests
 import statistics
 from databases import db
 import time
 pwd = os.getcwd()
 HTML = os.path.join(pwd,"html", "index.html")
 file_read = codecs.open(HTML, "r", "utf-8")
 index = file_read.read()
 html_page_index = Html(index)
 def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
    configPath=os.path.join(os.getcwd(),relPath)
    with open(configPath, 'r', encoding='utf-8') as file:
@ -20,6 +17,8 @@ def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experime
    Output= config[dataOut]
    return Output
 mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
 def getmetricvoice(model):
    rows = db(db.analitic_voice.model==model).select()
    rows_list = rows.as_list()
@ -36,70 +35,113 @@ def getmetricvoice(model):
 def html_getmetricvoice():
    models=list()
    t=time.time()
    for row in db().select(db.analitic_voice.model, distinct=True):
        models.append(row.model)
    data={}
    for model in models:
        data[model]=getmetricvoice(model)
    data=pd.DataFrame(data).T
-    datafiles={}
+    data_files={}
    for row in db().select(db.analitic_voice.ALL):
-        datafiles[row.id]=row.as_dict()
+        data_files[row.id]=row.as_dict()
-    datafiles=pd.DataFrame(datafiles).T
+    #print(datafiles)
    data_files=pd.DataFrame(data_files).T
    #table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'],
                       #columns=['model'], aggfunc="sum")
    #print(table,table.columns)
    html="""
    <h1>Data general de los modelos</h1>
    <taipy:table>{data_voice}</taipy:table>
    <h1>Data de cada muestra</h1>
    <taipy:table filter=True>{data_files_voice}</taipy:table>
    """
    #<taipy:chart mode="markers" x="x" y[1]="time" y[2]="similarity">{data_files_voice}</taipy:chart>
    print(time.time()-t)
    return html,data,data_files
-    return html,data,datafiles
+def getmetricllm_compra(model):
-html_page_getmetricsvoice,data_voice,data_files_voices=html_getmetricvoice()
+    rows = db(db.analitic_llm_compra.model==model).select()
    rows_list = rows.as_list()
    data=pd.DataFrame(rows_list)
    durationL=list()
    for i in rows_list:
        durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
    duration=statistics.mean(durationL)
    time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
    similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
    similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
    efectivetime=time/duration
    return ({"model":model,"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"efectivetime":efectivetime})
-mode="voice"
+def html_getmetricllm_compra():
-modetypedata="audio"
+    models=list()
-file="id2"
+    t=time.time()
-def changemenu(mode):
+    for row in db().select(db.analitic_llm_compra.model, distinct=True):
-    if mode_list[mode]=="audio":
+        models.append(row.model)
-        pathori="example/audio"
+    data={}
-    if mode_list[mode]=="factura":
+    for model in models:
-        pathori="example/factura"
+        data[model]=getmetricllm_compra(model)
-    if mode_list[mode]=="texto":
+    data=pd.DataFrame(data).T
-        pathori="example/texto"
+    data_files={}
-    seltypedata=mode_list[mode]  
+    for row in db().select(db.analitic_llm_compra.ALL):
-    dir_list = os.listdir(pathori)
+        data_files[row.id]=row.as_dict()
    #print(datafiles)
    data_files=pd.DataFrame(data_files).T
-    return pathori,seltypedata,dir_list
+    #table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'],
                       #columns=['model'], aggfunc="sum")
    #print(table,table.columns)
    html="""
    <h1>Data general de los modelos</h1>
    <taipy:table>{data_voice}</taipy:table>
    <h1>Data de cada muestra</h1>
    <taipy:table filter=True>{data_files_voice}</taipy:table>
-def trustedallhtml(mode):
+    """
-    pathori,seltypedata,dir_list=changemenu(mode)
+    #<taipy:chart mode="markers" x="x" y[1]="time" y[2]="similarity">{data_files_voice}</taipy:chart>
    print(time.time()-t)
    return html,data,data_files
 def on_init(state):
    state.html_page_getmetricsvoice,state.data_voice,state.data_files_voice=html_getmetricvoice()
    pass
 html_page_getmetricsvoice,data_voice,data_files_voice=html_getmetricvoice()
 # mode="voice"
 # modetypedata="audio"
 # file="id2"
 # def changemenu(mode):
 #     if mode_list[mode]=="audio":
 #         pathori="example/audio"
 #     if mode_list[mode]=="factura":
 #         pathori="example/factura"
 #     if mode_list[mode]=="texto":
 #         pathori="example/texto"
 #     seltypedata=mode_list[mode]  
 #     dir_list = os.listdir(pathori)
 #     return pathori,seltypedata,dir_list
    textmode=""
    for modeused in mode_list.keys():
        textmode=textmode+"('%s','%s'),"%(modeused,modeused)
    html="""<taipy:selector lov="{[%s]}" dropdown True on_change=changemenu>{sel}</taipy:selector>"""%(textmode) 
    Sal=""
    for i in dir_list:
        temp="""('%s', '%s'),"""%(str(pwd+"/"+pathori+"/"+i),str(i))
        Sal=Sal+temp
    html2="""<taipy:selector lov="{[%s]}" dropdown True >{sel2}</taipy:selector>"""%(Sal)
    return html+html2
 html_page_trustedall = Html(trustedallhtml(mode))
 #print(sel,sel2,seltypedata)
 HTML = os.path.join(pwd,"html", "index.html")
 file_read = codecs.open(HTML, "r", "utf-8")
 index = file_read.read()
 html_page_index = Html(index)
 data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
 pages = {
  "/": html_page_index ,
  "getmetricsvoice": Html(html_page_getmetricsvoice),
  "trustedall":html_page_trustedall
 }
 app = Gui(pages=pages)
 app.on_init=on_init
 if __name__=="__main__":
-    app.run(use_reloader=True,port=7882, change_delay=1600)#state.imageActive2,
+    app.run(use_reloader=True,port=7882)#state.imageActive2,
--- a/main.py
+++ b/main.py
@ -15,11 +15,8 @@ def EvalVoice2Text(endpoint,datajson,Trusted):
    """
    apiUrl=urlAud+endpoint
    response = requests.get(apiUrl,  json=datajson)
    print(datajson)
    A=json.loads(response.content)
    print(A)
    time=A['time']
    similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
    similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
    path=datajson["local"]
@ -34,27 +31,59 @@ def EvalVoice2Text(endpoint,datajson,Trusted):
            "path":path
            }
 def EvalWhisper(path,Trusted=""):
    endpoint="/voice2txt"
    datajson={"url":"","password":password ,"model":"whisper","local":path}
    return EvalVoice2Text(endpoint,datajson,Trusted)
 # EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
 #             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
 #             endpoint="/voice2txt")
 def EvalVosk(path,Trusted=""):
    endpoint="/voice2txtlocal"
    datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
    return EvalVoice2Text(endpoint,datajson,Trusted)
 def EvalLLMCompra(endpoint,datajson,Trusted):
    """Evaluate Voice 2 text
    """
    apiUrl=urlText+endpoint
    response = requests.get(apiUrl,  json=datajson)
    A=json.loads(response.content)
    time=A['time']
    print(A)
    similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
    similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
    #path=datajson["local"]
    model=datajson["model"]
    message=A['content']
    return {"content":message,
            "trusted":Trusted,
            "model":model,
            "time":time,
            "similarity":similarity,
            "similaritypartial":similarityPartial,
            "path":message
            }
 def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted):
    endpoint="/genTextCustom"
    datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
    return EvalLLMCompra(endpoint,datajson,Trusted)
 # EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
 #             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
 #             endpoint="/voice2txtlocal")
 # EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
 #             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
 #             endpoint="/voice2txt")
 def ocrfacturas(path,task_prompt):