From 881d3074cf48ab9e81ede1254a026113e54f0377 Mon Sep 17 00:00:00 2001
From: marioggil <marioggil@gmail.com>
Date: Sat, 3 Aug 2024 01:38:04 -0500
Subject: [PATCH] EvalCompra Ok

---
 apis.py      | 214 +++++++++++++++++++++++++++++++++++++++++++++++----
 databases.py |  14 +++-
 gui.py       | 134 +++++++++++++++++++++-----------
 main.py      |  47 ++++++++---
 4 files changed, 340 insertions(+), 69 deletions(-)

diff --git a/apis.py b/apis.py
index e626f38..e29be7d 100644
--- a/apis.py
+++ b/apis.py
@@ -45,6 +45,14 @@ class Response(BaseModel):
     """
     path: str = Query("", description="Style and sentiments of text")
     model : str = Query("whisper", description="Style and sentiments of text")
+class Response4(BaseModel):
+    path: str = Query("", description="path file")
+    system: str = Query("", description="prompt system LLM model with ocr and image claude")
+    content: str = Query("%s", description="prompt content LLM  model with ocr")
+    max_tokens: int = Query(1024, description="maxtoken LLM OCR model")
+    model: str = Query("Claude-sonnet", description="model")
+
+
 class Response1(BaseModel):
     path: str = Query("", description="path file")
     task_prompt: str = Query("", description="task of model")
@@ -66,7 +74,14 @@ class Response3(BaseModel):
     """
     path: str = Query("", description="Style and sentiments of text")
     Trusted: str = Query("", description="Style and sentiments of text")
-    mode : str = Query("whisper", description="Style and sentiments of text")
+    mode : str = Query("", description="Style and sentiments of text")
+
+class Response5(BaseModel):
+    """Structure of data to querry of make post from X or article blog
+    """
+    prompt: str = Query("", description="Style and sentiments of text")
+    mode : str = Query("", description="Style and sentiments of text")
+
 
 #Funcionales
 @app.get("/addTrusted")
@@ -99,11 +114,11 @@ def addTrusted(response:Response3):
             content={"content": "file no found" }
             )
     if mode_list[mode]=="texto":
-        hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt"
-        f = open("example/texto/"+hash1, "w")
-        f.write(path)
-        f.close()
-        path=pwd+"/"+pathText+hash1
+        info=str({"path":path,"trusted":Trusted,"mode":mode})
+        hash1 = hashlib.sha256(info.encode()).hexdigest()
+        # with open("example/texto/"+hash1, 'w') as f:
+        #     json.dump(info, f)
+        # path=pwd+"/"+pathText+hash1
         length=len(Trusted)
         size=0
         duration=0
@@ -112,22 +127,63 @@ def addTrusted(response:Response3):
         size=file_stats.st_size / (1024 * 1024)
         length=0
         duration=0
+        hash1=""
     elif mode_list[mode]=="audio":
         with audioread.audio_open(path) as f: 
             duration = f.duration
             length=0
             size=0
-
+            hash1=""
     if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0:
-        db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length )
+        db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length,hash=hash1 )
         db.commit()
         return "Add %s in mode %s"%(path,mode)
     else:
         item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last()
         modification_count=item.modification_count + 1
-        db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count)
+        db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count,hash=hash1)
         db.commit()
         return "Update %s in mode %s"%(path,mode)
+    
+
+@app.get("/addPrompt")
+@app.post("/addPrompt")
+def addPrompt(response:Response5):
+    """Api to add information of Trusted data
+
+    Args:
+        response (Response3): 3 params:
+        path : path of archive on system if is a file OR text if is text.
+        Trusted : information Trusted or better information in a process.
+        mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice,
+
+    Returns:
+        _type_: _description_
+    """
+    prompt=response.prompt
+    mode=response.mode
+    last_modified=datetime.now()
+    if mode not in mode_list.keys():
+        return JSONResponse(
+        status_code=404,
+        content={"content": "mode no found" }
+    )
+    if mode == "llm_compra":
+        hash1 = str(hashlib.sha256(prompt.encode()).hexdigest())
+        # with open("example/texto/"+hash1, 'w') as f:
+        #     json.dump(info, f)
+        # path=pwd+"/"+pathText+hash1
+        length=len(prompt)
+    if db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).count()==0:
+        db.prompt.insert(prompt=prompt,mode=mode,last_modified=last_modified,length=length,hash=hash1 )
+        db.commit()
+        return "Add %s in mode %s"%(prompt,mode)
+    else:
+        A=db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).update(prompt=prompt,mode=mode,last_modified=last_modified,length=length+1,hash=hash1)
+        db.commit()
+        print(A,last_modified)
+        return "Update %s in mode %s"%(prompt,mode)
+
 
 @app.get("/EvalVoice")
 @app.post("/EvalVoice")
@@ -141,17 +197,18 @@ def EvalVoice(response:Response):
     )
     
     Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted
-    print(Trusted)
     if model=="whisper":
         Sal=main.EvalWhisper(path,Trusted)
     else:
         Sal=main.EvalVosk(path,Trusted)
     Sal["last_modified"]=datetime.now()
-    if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
+    if db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).count()==0:
+        print(1,Sal)
         db.analitic_voice.insert(**Sal)
         db.commit()
     else:
-        db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
+        print(2,Sal)
+        db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
         db.commit()
     return Sal
 
@@ -192,7 +249,7 @@ def EvalVoicehtml():
     </style>
 </head>
 <body>
-    <h1>Petición POST a API</h1>
+    <h1>Petición Evaluar modelo de voz comtra datos curados</h1>
 
     <select id="texto1">
         %s
@@ -237,6 +294,137 @@ def EvalVoicehtml():
     """%(Sal)
     return HTMLResponse(content=html, status_code=200)
 
+
+@app.get("/EvalLLMCompra")
+@app.post("/EvalLLMCompra")
+def EvalLLMCompra(response:Response4):
+    content=response.path
+    model=response.model
+    system= response.system
+    max_tokens= response.max_tokens
+    path=content
+    
+    if db((db.trusted.path == path ) & ( db.trusted.mode == "llm_compra")).count()==0:
+        return JSONResponse(
+        status_code=404,
+        content={"content": "Trusted no found" }
+    )
+    
+    Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "llm_compra")).select().last().trusted
+    Sal=main.EvalModelLLMCompra(system,content,model,max_tokens,Trusted)
+    Sal["last_modified"]=datetime.now()
+    if db((db.analitic_llm_compra.path == Sal["path"]) & (db.analitic_llm_compra.model == Sal["model"])).count()==0:
+        print(1,Sal)
+        db.analitic_llm_compra.insert(**Sal)
+        db.commit()
+    else:
+        print(2,Sal)
+        db((db.analitic_llm_compra.path == Sal["path"]) & (db.analitic_llm_compra.model == Sal["model"])).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
+        db.commit()
+    return Sal
+
+@app.get("/evalllmcomprahtml")
+def EvalLLMComprahtml():
+    dir_list = db((db.trusted.mode == "llm_compra" )).select()
+    Sal=""
+    t=1
+    for i in dir_list:
+        temp="""<option value="%s">Opción %s, %s</option>
+        """%(i.path,str(t),str(i.path))
+        Sal=Sal+temp
+        t=t+1
+
+    dir_list2 = db((db.prompt.mode == "llm_compra" )).select()
+    Sal2=""
+    t=1
+    for i in dir_list2:
+        temp="""<option value="%s">Opción %s, %s</option>
+        """%(i.prompt,str(t),str(i.prompt))
+        Sal2=Sal2+temp
+        t=t+1
+
+
+    html="""<!DOCTYPE html>
+<html lang="es">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Evaluacion de modelos voice2txt</title>
+    <style>
+        body {
+            font-family: Arial, sans-serif;
+            margin: 20px;
+        }
+        input, button {
+            margin: 10px 0;
+            padding: 5px;
+        }
+        #respuesta {
+            margin-top: 20px;
+            padding: 10px;
+            border: 1px solid #ccc;
+            background-color: #f9f9f9;
+        }
+    </style>
+</head>
+<body>
+    <h1>Petición Evaluar modelo de voz comtra datos curados</h1>
+
+    <select id="texto1">
+        %s
+    </select>
+    
+    <br>
+    <select id="texto2">
+        <option value="meta-llama/Meta-Llama-3.1-70B-Instruct">meta-llama/Meta-Llama-3.1-70B-Instruct</option>
+        <option value="meta-llama/Meta-Llama-3.1-8B-Instruct">meta-llama/Meta-Llama-3.1-8B-Instruct</option>
+        <option value="Mistral">Mistral</option>
+    </select>
+        <br>
+    <select id="texto3">
+        %s
+    </select>
+        <br>
+    <input type="text" id="texto4" placeholder="max_tokens">
+    <br>
+    <button onclick="enviarPeticion()">Enviar petición</button>
+    <div id="respuesta"></div>
+
+    <script>
+        function enviarPeticion() {
+            const texto1 = document.getElementById('texto1').value;
+            const texto2 = document.getElementById('texto2').value;
+            const texto3 = document.getElementById('texto3').value;
+            const datos = {
+                path: texto1,
+                model: texto2,
+                system: texto3
+                
+
+            };
+
+            fetch('/EvalLLMCompra', {
+                method: 'POST',
+                headers: {
+                    'Content-Type': 'application/json'
+                },
+                body: JSON.stringify(datos)
+            })
+            .then(response => response.json())
+            .then(data => {
+                document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
+            })
+            .catch(error => {
+                document.getElementById('respuesta').innerHTML = 'Error: ' + error;
+            });
+        }
+    </script>
+</body>
+</html>
+    """%(Sal,Sal2)
+    return HTMLResponse(content=html, status_code=200)
+
+
 #Por revisar
 
 def list2tablehtml(listdata,model):
diff --git a/databases.py b/databases.py
index b5bae82..d19b055 100644
--- a/databases.py
+++ b/databases.py
@@ -9,8 +9,20 @@ db.define_table(
     Field("sizeMB",type="double",default=0),# audio,factura
     Field("length",type="integer",default=0),#texto
     Field('last_modified', 'datetime'),
-    Field('modification_count', 'integer', default=0)
+    Field('modification_count', 'integer', default=0),
+    Field('hash')
 )
+
+db.define_table(
+    "prompt",
+    Field("prompt"),
+    Field("mode"),
+    Field("length",type="integer",default=0),
+    Field('hash',unique=True),
+    Field('last_modified', 'datetime'),
+)
+
+
 db.define_table(
     "analitic_voice",
     Field("content"),
diff --git a/gui.py b/gui.py
index f96279a..eeb8233 100644
--- a/gui.py
+++ b/gui.py
@@ -7,12 +7,9 @@ import pandas as pd
 import requests
 import statistics
 from databases import db
+import time
 pwd = os.getcwd()
 
-HTML = os.path.join(pwd,"html", "index.html")
-file_read = codecs.open(HTML, "r", "utf-8")
-index = file_read.read()
-html_page_index = Html(index)
 def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
     configPath=os.path.join(os.getcwd(),relPath)
     with open(configPath, 'r', encoding='utf-8') as file:
@@ -20,6 +17,8 @@ def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experime
     Output= config[dataOut]
     return Output
 mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
+
+
 def getmetricvoice(model):
     rows = db(db.analitic_voice.model==model).select()
     rows_list = rows.as_list()
@@ -36,70 +35,113 @@ def getmetricvoice(model):
 
 def html_getmetricvoice():
     models=list()
+    t=time.time()
     for row in db().select(db.analitic_voice.model, distinct=True):
         models.append(row.model)
     data={}
     for model in models:
         data[model]=getmetricvoice(model)
     data=pd.DataFrame(data).T
-    datafiles={}
+    data_files={}
     for row in db().select(db.analitic_voice.ALL):
-        datafiles[row.id]=row.as_dict()
-    datafiles=pd.DataFrame(datafiles).T
+        data_files[row.id]=row.as_dict()
+    #print(datafiles)
+    data_files=pd.DataFrame(data_files).T
+
+    #table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'],
+                       #columns=['model'], aggfunc="sum")
+    #print(table,table.columns)
+
     html="""
+    <h1>Data general de los modelos</h1>
     <taipy:table>{data_voice}</taipy:table>
+    <h1>Data de cada muestra</h1>
     <taipy:table filter=True>{data_files_voice}</taipy:table>
-    """
-
-    return html,data,datafiles
-html_page_getmetricsvoice,data_voice,data_files_voices=html_getmetricvoice()
-
-mode="voice"
-modetypedata="audio"
-file="id2"
-def changemenu(mode):
-    if mode_list[mode]=="audio":
-        pathori="example/audio"
-    if mode_list[mode]=="factura":
-        pathori="example/factura"
-    if mode_list[mode]=="texto":
-        pathori="example/texto"
-    seltypedata=mode_list[mode]  
-    dir_list = os.listdir(pathori)
     
-    return pathori,seltypedata,dir_list
+
+    """
+    #<taipy:chart mode="markers" x="x" y[1]="time" y[2]="similarity">{data_files_voice}</taipy:chart>
+    print(time.time()-t)
+    return html,data,data_files
+
+def getmetricllm_compra(model):
+    rows = db(db.analitic_llm_compra.model==model).select()
+    rows_list = rows.as_list()
+    data=pd.DataFrame(rows_list)
+    durationL=list()
+    for i in rows_list:
+        durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
+    duration=statistics.mean(durationL)
+    time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
+    similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
+    similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
+    efectivetime=time/duration
+    return ({"model":model,"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"efectivetime":efectivetime})
+
+def html_getmetricllm_compra():
+    models=list()
+    t=time.time()
+    for row in db().select(db.analitic_llm_compra.model, distinct=True):
+        models.append(row.model)
+    data={}
+    for model in models:
+        data[model]=getmetricllm_compra(model)
+    data=pd.DataFrame(data).T
+    data_files={}
+    for row in db().select(db.analitic_llm_compra.ALL):
+        data_files[row.id]=row.as_dict()
+    #print(datafiles)
+    data_files=pd.DataFrame(data_files).T
+
+    #table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'],
+                       #columns=['model'], aggfunc="sum")
+    #print(table,table.columns)
+
+    html="""
+    <h1>Data general de los modelos</h1>
+    <taipy:table>{data_voice}</taipy:table>
+    <h1>Data de cada muestra</h1>
+    <taipy:table filter=True>{data_files_voice}</taipy:table>
+    
+
+    """
+    #<taipy:chart mode="markers" x="x" y[1]="time" y[2]="similarity">{data_files_voice}</taipy:chart>
+    print(time.time()-t)
+    return html,data,data_files
 
 
-def trustedallhtml(mode):
-    pathori,seltypedata,dir_list=changemenu(mode)
+def on_init(state):
+    state.html_page_getmetricsvoice,state.data_voice,state.data_files_voice=html_getmetricvoice()
+    pass
+    
+    
+
+html_page_getmetricsvoice,data_voice,data_files_voice=html_getmetricvoice()
+# mode="voice"
+# modetypedata="audio"
+# file="id2"
+# def changemenu(mode):
+#     if mode_list[mode]=="audio":
+#         pathori="example/audio"
+#     if mode_list[mode]=="factura":
+#         pathori="example/factura"
+#     if mode_list[mode]=="texto":
+#         pathori="example/texto"
+#     seltypedata=mode_list[mode]  
+#     dir_list = os.listdir(pathori)
+    
+#     return pathori,seltypedata,dir_list
 
 
-    textmode=""
-    for modeused in mode_list.keys():
-        textmode=textmode+"('%s','%s'),"%(modeused,modeused)
-    html="""<taipy:selector lov="{[%s]}" dropdown True on_change=changemenu>{sel}</taipy:selector>"""%(textmode) 
-    Sal=""
-    for i in dir_list:
-        temp="""('%s', '%s'),"""%(str(pwd+"/"+pathori+"/"+i),str(i))
-        Sal=Sal+temp
-    html2="""<taipy:selector lov="{[%s]}" dropdown True >{sel2}</taipy:selector>"""%(Sal)
-    return html+html2
 
-html_page_trustedall = Html(trustedallhtml(mode))
-#print(sel,sel2,seltypedata)
-HTML = os.path.join(pwd,"html", "index.html")
-file_read = codecs.open(HTML, "r", "utf-8")
-index = file_read.read()
-html_page_index = Html(index)
 
 data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
 
 pages = {
-  "/": html_page_index ,
   "getmetricsvoice": Html(html_page_getmetricsvoice),
-  "trustedall":html_page_trustedall
 }
 
 app = Gui(pages=pages)
+app.on_init=on_init
 if __name__=="__main__":
-    app.run(use_reloader=True,port=7882, change_delay=1600)#state.imageActive2,
+    app.run(use_reloader=True,port=7882)#state.imageActive2,
diff --git a/main.py b/main.py
index fc6f87d..47aef3a 100644
--- a/main.py
+++ b/main.py
@@ -15,11 +15,8 @@ def EvalVoice2Text(endpoint,datajson,Trusted):
     """
     apiUrl=urlAud+endpoint
     response = requests.get(apiUrl,  json=datajson)
-    print(datajson)
     A=json.loads(response.content)
-    print(A)
     time=A['time']
-
     similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
     similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
     path=datajson["local"]
@@ -34,27 +31,59 @@ def EvalVoice2Text(endpoint,datajson,Trusted):
             "path":path
             }
 
-
 def EvalWhisper(path,Trusted=""):
     endpoint="/voice2txt"
     datajson={"url":"","password":password ,"model":"whisper","local":path}
     return EvalVoice2Text(endpoint,datajson,Trusted)
 
-
-# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
-#             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
-#             endpoint="/voice2txt")
-
 def EvalVosk(path,Trusted=""):
     endpoint="/voice2txtlocal"
     datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
     return EvalVoice2Text(endpoint,datajson,Trusted)
 
 
+def EvalLLMCompra(endpoint,datajson,Trusted):
+    """Evaluate Voice 2 text
+    """
+    apiUrl=urlText+endpoint
+    response = requests.get(apiUrl,  json=datajson)
+    A=json.loads(response.content)
+    time=A['time']
+    print(A)
+    similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
+    similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
+    #path=datajson["local"]
+    model=datajson["model"]
+
+    message=A['content']
+    return {"content":message,
+            "trusted":Trusted,
+            "model":model,
+            "time":time,
+            "similarity":similarity,
+            "similaritypartial":similarityPartial,
+            "path":message
+            }
+
+def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted):
+    endpoint="/genTextCustom"
+    datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
+    return EvalLLMCompra(endpoint,datajson,Trusted)
+
+
+
+
+
+
+
+
 
 # EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
 #             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
 #             endpoint="/voice2txtlocal")
+# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
+#             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
+#             endpoint="/voice2txt")
 
 
 def ocrfacturas(path,task_prompt):