Metrics voice completes

2024-07-30 11:49:42 -05:00 · 2024-07-30 11:49:42 -05:00 · f7e3913d2e
parent 4b52d4aa91
commit f7e3913d2e
6 changed files with 317 additions and 48 deletions
--- a/.gitignore
+++ b/.gitignore
@ -8,3 +8,4 @@ databases/storage.db
 .vscode/*
 __pycache__/*

+conf/experiment_config.json
--- a/apis.py
+++ b/apis.py
@ -1,10 +1,10 @@
 import fastapi
 from fastapi import FastAPI, Request
-from fastapi.responses import HTMLResponse
+from fastapi.responses import HTMLResponse,JSONResponse
 from pydantic import BaseModel
 import time
 from fastapi.staticfiles import StaticFiles
-from fastapi import FastAPI, Query, File, UploadFile
+from fastapi import FastAPI, Query, File, UploadFile,HTTPException
 #from fastapi.middleware.cors import CORSMiddleware
 from starlette.middleware.cors import CORSMiddleware
 import main
@ -13,9 +13,23 @@ from databases import db
 import audioread
 import pandas as pd
 import statistics
+import hashlib
+from datetime import datetime
+import json
 pwd = os.getcwd()
 pathAud="example/audio"
 pathFact="example/factura"
+pathText="example/texto"
+
+def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
+    configPath=os.path.join(os.getcwd(),relPath)
+    with open(configPath, 'r', encoding='utf-8') as file:
+        config = json.load(file)[nameModel]
+    Output= config[dataOut]
+    return Output
+mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
+
+
 app = FastAPI()
 #app.mount("/statics", StaticFiles(directory="statics"), name="statics")
 app.add_middleware(
@ -30,7 +44,6 @@ class Response(BaseModel):
    """Structure of data to querry of make post from X or article blog
    """
    path: str = Query("", description="Style and sentiments of text")
-    Trusted: str = Query("", description="Style and sentiments of text")
    model : str = Query("whisper", description="Style and sentiments of text")
 class Response1(BaseModel):
    path: str = Query("", description="path file")
@ -55,29 +68,95 @@ class Response3(BaseModel):
    Trusted: str = Query("", description="Style and sentiments of text")
    mode : str = Query("whisper", description="Style and sentiments of text")

-
+#Funcionales
@app.get("/addTrusted")
@app.post("/addTrusted")
 def addTrusted(response:Response3):
+    """Api to add information of Trusted data
+
+    Args:
+        response (Response3): 3 params:
+        path : path of archive on system if is a file OR text if is text.
+        Trusted : information Trusted or better information in a process.
+        mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice,
+
+    Returns:
+        _type_: _description_
+    """
    path=response.path
    Trusted=response.Trusted
    mode=response.mode
-    file_stats = os.stat(path)
-    size=file_stats.st_size / (1024 * 1024)
-    if mode=="voice":
+    last_modified=datetime.now()
+    if mode not in mode_list.keys():
+        return JSONResponse(
+        status_code=404,
+        content={"content": "mode no found" }
+    )
+    if mode == "llm_factura" or mode == "ocr" or mode == "voice":
+        if not os.path.isfile(path):
+            return JSONResponse(
+            status_code=404,
+            content={"content": "file no found" }
+            )
+    if mode_list[mode]=="texto":
+        hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt"
+        f = open("example/texto/"+hash1, "w")
+        f.write(path)
+        f.close()
+        path=pwd+"/"+pathText+hash1
+        length=len(Trusted)
+        size=0
+        duration=0
+    elif mode_list[mode]=="factura":
+        file_stats = os.stat(path)
+        size=file_stats.st_size / (1024 * 1024)
+        length=0
+        duration=0
+    elif mode_list[mode]=="audio":
        with audioread.audio_open(path) as f: 
            duration = f.duration
-    else:
-        duration = 0
-    if db(db.trusted.path == path and db.trusted.mode == mode).count()==0:
-        db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration =duration )
+            length=0
+            size=0
+
+    if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0:
+        db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length )
        db.commit()
        return "Add %s in mode %s"%(path,mode)
    else:
-        db(db.trusted.path == path and db.trusted.mode == mode).update(trusted=Trusted,size=size,duration =duration )
+        item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last()
+        modification_count=item.modification_count + 1
+        db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count)
        db.commit()
        return "Update %s in mode %s"%(path,mode)

+@app.get("/EvalVoice")
+@app.post("/EvalVoice")
+def EvalVoice(response:Response):
+    path=response.path
+    model=response.model
+    if db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).count()==0:
+        return JSONResponse(
+        status_code=404,
+        content={"content": "Trusted no found" }
+    )
+    
+    Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted
+    print(Trusted)
+    if model=="whisper":
+        Sal=main.EvalWhisper(path,Trusted)
+    else:
+        Sal=main.EvalVosk(path,Trusted)
+    Sal["last_modified"]=datetime.now()
+    if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
+        db.analitic_voice.insert(**Sal)
+        db.commit()
+    else:
+        db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
+        db.commit()
+    return Sal
+
+
+
 def list2tablehtml(listdata,model):
    html="""<h2>Table of {0}</h2>
 <table style="width:100%">
@ -223,30 +302,6 @@ display:flex;
    return HTMLResponse(content=html, status_code=200)


-@app.get("/EvalVoice")
-@app.post("/EvalVoice")
-def EvalVoice(response:Response):
-    path=response.path
-    Trusted=response.Trusted
-    model=response.model
-    if Trusted=="":
-        row=db(db.trusted.path == path and db.trusted.mode == "voice").select().first()
-        try:
-            Trusted=row.trusted
-        except:
-            pass
-
-    if model=="whisper":
-        Sal=main.EvalWhisper(path,Trusted)
-    else:
-        Sal=main.EvalVosk(path,Trusted)
-    if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
-        db.analitic_voice.insert(**Sal)
-        db.commit()
-    else:
-        db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"])
-        db.commit()
-    return Sal

@app.get("/EvalFact")
@app.post("/EvalFact")
@ -334,9 +389,7 @@ def EvalVoicehtml():
    </select>
    
    <br>
-    <input type="text" id="texto2" placeholder="Trusted">
-    <br>
-    <select id="texto3">
+    <select id="texto2">
        <option value="whisper">whisper</option>
        <option value="vosk">vosk</option>
    </select>
@ -348,11 +401,9 @@ def EvalVoicehtml():
        function enviarPeticion() {
            const texto1 = document.getElementById('texto1').value;
            const texto2 = document.getElementById('texto2').value;
-            const texto3 = document.getElementById('texto3').value;
            const datos = {
                path: texto1,
-                Trusted: texto2,
-                model: texto3
+                model: texto2
            };

            fetch('/EvalVoice', {
--- a/databases.py
+++ b/databases.py
@ -5,8 +5,11 @@ db.define_table(
    Field("path"),
    Field("mode"),
    Field("trusted"),
-    Field("duration",type="double"),
-    Field("size",type="double")
+    Field("duration",type="double",default=0),#audio
+    Field("sizeMB",type="double",default=0),# audio,factura
+    Field("length",type="integer",default=0),#texto
+    Field('last_modified', 'datetime'),
+    Field('modification_count', 'integer', default=0)
 )
 db.define_table(
    "analitic_voice",
@ -16,7 +19,8 @@ db.define_table(
    Field("time", type="double"),
    Field("path"),
    Field("similarity", type="double"),
-    Field("similaritypartial", type="double")   
+    Field("similaritypartial", type="double"),
+    Field('last_modified', 'datetime')  
 )

 db.define_table(
@ -28,16 +32,54 @@ db.define_table(
    Field("path"),
    Field("similarity", type="double"),
    Field("similaritypartial", type="double"),
-    Field("jsonok" ,type="integer") 
+    Field("jsonok" ,type="integer"),
+    Field('last_modified', 'datetime')  
 )

 db.define_table(
-    "analitic_llm",
+    "analitic_llm_compra",
    Field("content"),
    Field("trusted"),
    Field("model"),
    Field("time", type="double"),
    Field("path"),
    Field("similarity", type="double"),
-    Field("similaritypartial", type="double")   
+    Field("similaritypartial", type="double"),
+    Field('last_modified', 'datetime')    
+)
+
+db.define_table(
+    "analitic_llm_factura",
+    Field("content"),
+    Field("trusted"),
+    Field("model"),
+    Field("time", type="double"),
+    Field("path"),
+    Field("similarity", type="double"),
+    Field("similaritypartial", type="double"),
+    Field('last_modified', 'datetime')    
+)
+
+db.define_table(
+    "analitic_llm_generaciontexto",
+    Field("content"),
+    Field("trusted"),
+    Field("model"),
+    Field("time", type="double"),
+    Field("path"),
+    Field("similarity", type="double"),
+    Field("similaritypartial", type="double"),
+    Field('last_modified', 'datetime')    
+)
+
+db.define_table(
+    "analitic_llm_rag",
+    Field("content"),
+    Field("trusted"),
+    Field("model"),
+    Field("time", type="double"),
+    Field("path"),
+    Field("similarity", type="double"),
+    Field("similaritypartial", type="double"),
+    Field('last_modified', 'datetime')    
 )
--- a/gui.py
+++ b/gui.py
@ -0,0 +1,153 @@
+from taipy.gui import Gui
+import hashlib
+import json
+import codecs, os
+from taipy.gui import Html
+import pandas as pd
+import requests
+import statistics
+from databases import db
+pwd = os.getcwd()
+
+HTML = os.path.join(pwd,"html", "index.html")
+file_read = codecs.open(HTML, "r", "utf-8")
+index = file_read.read()
+html_page_index = Html(index)
+
+def getmetricvoice(model):
+    rows = db(db.analitic_voice.model==model).select()
+    rows_list = rows.as_list()
+    data=pd.DataFrame(rows_list)
+    durationL=list()
+    for i in rows_list:
+        durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
+    duration=statistics.mean(durationL)
+    time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
+    similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
+    similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
+    efectivetime=time/duration
+    return ({"model":model,"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"efectivetime":efectivetime})
+
+def html_getmetricvoice():
+    models=list()
+    for row in db().select(db.analitic_voice.model, distinct=True):
+        models.append(row.model)
+    data={}
+    for model in models:
+        data[model]=getmetricvoice(model)
+    data=pd.DataFrame(data).T
+    datafiles={}
+    for row in db().select(db.analitic_voice.ALL):
+        datafiles[row.id]=row.as_dict()
+    datafiles=pd.DataFrame(datafiles).T
+    html="""
+    <taipy:table>{data_voice}</taipy:table>
+    <taipy:table filter=True>{data_files_voice}</taipy:table>
+    """
+
+    return html,data,datafiles
+html_page_getmetricsvoice,data_voice,data_files_voices=html_getmetricvoice()
+
+
+def evalVoicehtml():
+    pathAud="example/audio"
+    dir_list = os.listdir(pathAud)
+    Sal=""
+    t=1
+    for i in dir_list:
+
+        temp="""<option value="%s">Opción %s, %s</option>
+        """%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i))
+        Sal=Sal+temp
+        t=t+1
+
+
+        html="""<!DOCTYPE html>
+    <html lang="es">
+    <head>
+        <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
+        <title>Evaluacion de modelos voice2txt</title>
+        <style>
+            body {
+                font-family: Arial, sans-serif;
+                margin: 20px;
+            }
+            input, button {
+                margin: 10px 0;
+                padding: 5px;
+            }
+            #respuesta {
+                margin-top: 20px;
+                padding: 10px;
+                border: 1px solid #ccc;
+                background-color: #f9f9f9;
+            }
+        </style>
+    </head>
+    <body>
+        <h1>Petición POST a API</h1>
+
+        <select id="texto1">
+            %s
+        </select>
+        
+        <br>
+        <select id="texto2">
+            <option value="whisper">whisper</option>
+            <option value="vosk">vosk</option>
+        </select>
+        <br>
+        <button onclick="enviarPeticion()">Enviar petición</button>
+        <div id="respuesta"></div>
+
+        <script>
+            function enviarPeticion() {
+                const texto1 = document.getElementById('texto1').value;
+                const texto2 = document.getElementById('texto2').value;
+                const datos = {
+                    path: texto1,
+                    model: texto2
+                };
+
+                fetch('/EvalVoice', {
+                    method: 'POST',
+                    headers: {
+                        'Content-Type': 'application/json'
+                    },
+                    body: JSON.stringify(datos)
+                })
+                .then(response => response.json())
+                .then(data => {
+                    document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
+                })
+                .catch(error => {
+                    document.getElementById('respuesta').innerHTML = 'Error: ' + error;
+                });
+            }
+        </script>
+    </body>
+    </html>
+        """%(Sal)
+    return html
+
+
+    
+html_page_evalvoice = Html(evalVoicehtml())
+
+HTML = os.path.join(pwd,"html", "index.html")
+file_read = codecs.open(HTML, "r", "utf-8")
+index = file_read.read()
+html_page_index = Html(index)
+
+data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
+
+pages = {
+  "/": html_page_index ,
+  "getmetricsvoice": Html(html_page_getmetricsvoice),
+  "evalvoice":html_page_evalvoice
+}
+
+app = Gui(pages=pages)
+if __name__=="__main__":
+    app.run(use_reloader=True,port=7882, change_delay=1600)#state.imageActive2,
--- a/html/getmetricsvoice.html
+++ b/html/getmetricsvoice.html
--- a/html/index.html
+++ b/html/index.html
@ -0,0 +1,22 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>CIDITEL AI Playground</title>
+    <link rel="stylesheet" tyle="text-decoration: none;" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
+    <link rel="stylesheet" tyle="text-decoration: none;" href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:opsz,wght,FILL,GRAD@24,400,0,0" />
+    <link rel="shortcut icon" tyle="text-decoration: none;" href="statics/icons/favicon.svg" type="image/x-icon">
+    <link rel="stylesheet" tyle="text-decoration: none;" href="statics/css/style.css">  
+    <link rel="stylesheet" tyle="text-decoration: none;" href="statics/css/media-queries.css">  
+    <link rel="preconnect" tyle="text-decoration: none;" href="https://fonts.googleapis.com">
+    <link rel="preconnect" tyle="text-decoration: none;" href="https://fonts.gstatic.com" crossorigin>
+    <link tyle="text-decoration: none;" href="https://fonts.googleapis.com/css2?family=Kanit:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;0,800;0,900;1,100;1,200;1,300;1,400;1,500;1,600;1,700;1,800;1,900&display=swap" rel="stylesheet">
+</head>
+<body>
+
+
+    <taipy:table>{data}</taipy:table>
+
+</body>
+</html>