diff --git a/.gitignore b/.gitignore index d910e66..68f2a5a 100644 --- a/.gitignore +++ b/.gitignore @@ -8,3 +8,4 @@ databases/storage.db .vscode/* __pycache__/* +conf/experiment_config.json diff --git a/apis.py b/apis.py index 1b548e0..4de90ed 100644 --- a/apis.py +++ b/apis.py @@ -1,10 +1,10 @@ import fastapi from fastapi import FastAPI, Request -from fastapi.responses import HTMLResponse +from fastapi.responses import HTMLResponse,JSONResponse from pydantic import BaseModel import time from fastapi.staticfiles import StaticFiles -from fastapi import FastAPI, Query, File, UploadFile +from fastapi import FastAPI, Query, File, UploadFile,HTTPException #from fastapi.middleware.cors import CORSMiddleware from starlette.middleware.cors import CORSMiddleware import main @@ -13,9 +13,23 @@ from databases import db import audioread import pandas as pd import statistics +import hashlib +from datetime import datetime +import json pwd = os.getcwd() pathAud="example/audio" pathFact="example/factura" +pathText="example/texto" + +def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"): + configPath=os.path.join(os.getcwd(),relPath) + with open(configPath, 'r', encoding='utf-8') as file: + config = json.load(file)[nameModel] + Output= config[dataOut] + return Output +mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list") + + app = FastAPI() #app.mount("/statics", StaticFiles(directory="statics"), name="statics") app.add_middleware( @@ -30,7 +44,6 @@ class Response(BaseModel): """Structure of data to querry of make post from X or article blog """ path: str = Query("", description="Style and sentiments of text") - Trusted: str = Query("", description="Style and sentiments of text") model : str = Query("whisper", description="Style and sentiments of text") class Response1(BaseModel): path: str = Query("", description="path file") @@ -55,29 +68,95 @@ class Response3(BaseModel): Trusted: str = Query("", description="Style and sentiments of text") mode : str = Query("whisper", description="Style and sentiments of text") - +#Funcionales @app.get("/addTrusted") @app.post("/addTrusted") def addTrusted(response:Response3): + """Api to add information of Trusted data + + Args: + response (Response3): 3 params: + path : path of archive on system if is a file OR text if is text. + Trusted : information Trusted or better information in a process. + mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice, + + Returns: + _type_: _description_ + """ path=response.path Trusted=response.Trusted mode=response.mode - file_stats = os.stat(path) - size=file_stats.st_size / (1024 * 1024) - if mode=="voice": + last_modified=datetime.now() + if mode not in mode_list.keys(): + return JSONResponse( + status_code=404, + content={"content": "mode no found" } + ) + if mode == "llm_factura" or mode == "ocr" or mode == "voice": + if not os.path.isfile(path): + return JSONResponse( + status_code=404, + content={"content": "file no found" } + ) + if mode_list[mode]=="texto": + hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt" + f = open("example/texto/"+hash1, "w") + f.write(path) + f.close() + path=pwd+"/"+pathText+hash1 + length=len(Trusted) + size=0 + duration=0 + elif mode_list[mode]=="factura": + file_stats = os.stat(path) + size=file_stats.st_size / (1024 * 1024) + length=0 + duration=0 + elif mode_list[mode]=="audio": with audioread.audio_open(path) as f: duration = f.duration - else: - duration = 0 - if db(db.trusted.path == path and db.trusted.mode == mode).count()==0: - db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration =duration ) + length=0 + size=0 + + if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0: + db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length ) db.commit() return "Add %s in mode %s"%(path,mode) else: - db(db.trusted.path == path and db.trusted.mode == mode).update(trusted=Trusted,size=size,duration =duration ) + item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last() + modification_count=item.modification_count + 1 + db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count) db.commit() return "Update %s in mode %s"%(path,mode) +@app.get("/EvalVoice") +@app.post("/EvalVoice") +def EvalVoice(response:Response): + path=response.path + model=response.model + if db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).count()==0: + return JSONResponse( + status_code=404, + content={"content": "Trusted no found" } + ) + + Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted + print(Trusted) + if model=="whisper": + Sal=main.EvalWhisper(path,Trusted) + else: + Sal=main.EvalVosk(path,Trusted) + Sal["last_modified"]=datetime.now() + if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0: + db.analitic_voice.insert(**Sal) + db.commit() + else: + db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"]) + db.commit() + return Sal + + + def list2tablehtml(listdata,model): html="""

Table of {0}

@@ -223,30 +302,6 @@ display:flex; return HTMLResponse(content=html, status_code=200) -@app.get("/EvalVoice") -@app.post("/EvalVoice") -def EvalVoice(response:Response): - path=response.path - Trusted=response.Trusted - model=response.model - if Trusted=="": - row=db(db.trusted.path == path and db.trusted.mode == "voice").select().first() - try: - Trusted=row.trusted - except: - pass - - if model=="whisper": - Sal=main.EvalWhisper(path,Trusted) - else: - Sal=main.EvalVosk(path,Trusted) - if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0: - db.analitic_voice.insert(**Sal) - db.commit() - else: - db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"]) - db.commit() - return Sal @app.get("/EvalFact") @app.post("/EvalFact") @@ -334,9 +389,7 @@ def EvalVoicehtml():
- -
- @@ -348,11 +401,9 @@ def EvalVoicehtml(): function enviarPeticion() { const texto1 = document.getElementById('texto1').value; const texto2 = document.getElementById('texto2').value; - const texto3 = document.getElementById('texto3').value; const datos = { path: texto1, - Trusted: texto2, - model: texto3 + model: texto2 }; fetch('/EvalVoice', { diff --git a/databases.py b/databases.py index 6f24fef..b5bae82 100644 --- a/databases.py +++ b/databases.py @@ -5,8 +5,11 @@ db.define_table( Field("path"), Field("mode"), Field("trusted"), - Field("duration",type="double"), - Field("size",type="double") + Field("duration",type="double",default=0),#audio + Field("sizeMB",type="double",default=0),# audio,factura + Field("length",type="integer",default=0),#texto + Field('last_modified', 'datetime'), + Field('modification_count', 'integer', default=0) ) db.define_table( "analitic_voice", @@ -16,7 +19,8 @@ db.define_table( Field("time", type="double"), Field("path"), Field("similarity", type="double"), - Field("similaritypartial", type="double") + Field("similaritypartial", type="double"), + Field('last_modified', 'datetime') ) db.define_table( @@ -28,16 +32,54 @@ db.define_table( Field("path"), Field("similarity", type="double"), Field("similaritypartial", type="double"), - Field("jsonok" ,type="integer") + Field("jsonok" ,type="integer"), + Field('last_modified', 'datetime') ) db.define_table( - "analitic_llm", + "analitic_llm_compra", Field("content"), Field("trusted"), Field("model"), Field("time", type="double"), Field("path"), Field("similarity", type="double"), - Field("similaritypartial", type="double") + Field("similaritypartial", type="double"), + Field('last_modified', 'datetime') +) + +db.define_table( + "analitic_llm_factura", + Field("content"), + Field("trusted"), + Field("model"), + Field("time", type="double"), + Field("path"), + Field("similarity", type="double"), + Field("similaritypartial", type="double"), + Field('last_modified', 'datetime') +) + +db.define_table( + "analitic_llm_generaciontexto", + Field("content"), + Field("trusted"), + Field("model"), + Field("time", type="double"), + Field("path"), + Field("similarity", type="double"), + Field("similaritypartial", type="double"), + Field('last_modified', 'datetime') +) + +db.define_table( + "analitic_llm_rag", + Field("content"), + Field("trusted"), + Field("model"), + Field("time", type="double"), + Field("path"), + Field("similarity", type="double"), + Field("similaritypartial", type="double"), + Field('last_modified', 'datetime') ) \ No newline at end of file diff --git a/gui.py b/gui.py new file mode 100644 index 0000000..0577842 --- /dev/null +++ b/gui.py @@ -0,0 +1,153 @@ +from taipy.gui import Gui +import hashlib +import json +import codecs, os +from taipy.gui import Html +import pandas as pd +import requests +import statistics +from databases import db +pwd = os.getcwd() + +HTML = os.path.join(pwd,"html", "index.html") +file_read = codecs.open(HTML, "r", "utf-8") +index = file_read.read() +html_page_index = Html(index) + +def getmetricvoice(model): + rows = db(db.analitic_voice.model==model).select() + rows_list = rows.as_list() + data=pd.DataFrame(rows_list) + durationL=list() + for i in rows_list: + durationL.append(db(db.trusted.path == i["path"] ).select().last().duration) + duration=statistics.mean(durationL) + time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0] + similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0] + similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0] + efectivetime=time/duration + return ({"model":model,"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"efectivetime":efectivetime}) + +def html_getmetricvoice(): + models=list() + for row in db().select(db.analitic_voice.model, distinct=True): + models.append(row.model) + data={} + for model in models: + data[model]=getmetricvoice(model) + data=pd.DataFrame(data).T + datafiles={} + for row in db().select(db.analitic_voice.ALL): + datafiles[row.id]=row.as_dict() + datafiles=pd.DataFrame(datafiles).T + html=""" + {data_voice} + {data_files_voice} + """ + + return html,data,datafiles +html_page_getmetricsvoice,data_voice,data_files_voices=html_getmetricvoice() + + +def evalVoicehtml(): + pathAud="example/audio" + dir_list = os.listdir(pathAud) + Sal="" + t=1 + for i in dir_list: + + temp=""" + """%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i)) + Sal=Sal+temp + t=t+1 + + + html=""" + + + + + Evaluacion de modelos voice2txt + + + +

Petición POST a API

+ + + +
+ +
+ +
+ + + + + """%(Sal) + return html + + + +html_page_evalvoice = Html(evalVoicehtml()) + +HTML = os.path.join(pwd,"html", "index.html") +file_read = codecs.open(HTML, "r", "utf-8") +index = file_read.read() +html_page_index = Html(index) + +data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) + +pages = { + "/": html_page_index , + "getmetricsvoice": Html(html_page_getmetricsvoice), + "evalvoice":html_page_evalvoice +} + +app = Gui(pages=pages) +if __name__=="__main__": + app.run(use_reloader=True,port=7882, change_delay=1600)#state.imageActive2, diff --git a/html/getmetricsvoice.html b/html/getmetricsvoice.html new file mode 100644 index 0000000..e69de29 diff --git a/html/index.html b/html/index.html new file mode 100644 index 0000000..7486521 --- /dev/null +++ b/html/index.html @@ -0,0 +1,22 @@ + + + + + + CIDITEL AI Playground + + + + + + + + + + + + + {data} + + + \ No newline at end of file