From 881d3074cf48ab9e81ede1254a026113e54f0377 Mon Sep 17 00:00:00 2001 From: marioggil Date: Sat, 3 Aug 2024 01:38:04 -0500 Subject: [PATCH] EvalCompra Ok --- apis.py | 214 +++++++++++++++++++++++++++++++++++++++++++++++---- databases.py | 14 +++- gui.py | 134 +++++++++++++++++++++----------- main.py | 47 ++++++++--- 4 files changed, 340 insertions(+), 69 deletions(-) diff --git a/apis.py b/apis.py index e626f38..e29be7d 100644 --- a/apis.py +++ b/apis.py @@ -45,6 +45,14 @@ class Response(BaseModel): """ path: str = Query("", description="Style and sentiments of text") model : str = Query("whisper", description="Style and sentiments of text") +class Response4(BaseModel): + path: str = Query("", description="path file") + system: str = Query("", description="prompt system LLM model with ocr and image claude") + content: str = Query("%s", description="prompt content LLM model with ocr") + max_tokens: int = Query(1024, description="maxtoken LLM OCR model") + model: str = Query("Claude-sonnet", description="model") + + class Response1(BaseModel): path: str = Query("", description="path file") task_prompt: str = Query("", description="task of model") @@ -66,7 +74,14 @@ class Response3(BaseModel): """ path: str = Query("", description="Style and sentiments of text") Trusted: str = Query("", description="Style and sentiments of text") - mode : str = Query("whisper", description="Style and sentiments of text") + mode : str = Query("", description="Style and sentiments of text") + +class Response5(BaseModel): + """Structure of data to querry of make post from X or article blog + """ + prompt: str = Query("", description="Style and sentiments of text") + mode : str = Query("", description="Style and sentiments of text") + #Funcionales @app.get("/addTrusted") @@ -99,11 +114,11 @@ def addTrusted(response:Response3): content={"content": "file no found" } ) if mode_list[mode]=="texto": - hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt" - f = open("example/texto/"+hash1, "w") - f.write(path) - f.close() - path=pwd+"/"+pathText+hash1 + info=str({"path":path,"trusted":Trusted,"mode":mode}) + hash1 = hashlib.sha256(info.encode()).hexdigest() + # with open("example/texto/"+hash1, 'w') as f: + # json.dump(info, f) + # path=pwd+"/"+pathText+hash1 length=len(Trusted) size=0 duration=0 @@ -112,22 +127,63 @@ def addTrusted(response:Response3): size=file_stats.st_size / (1024 * 1024) length=0 duration=0 + hash1="" elif mode_list[mode]=="audio": with audioread.audio_open(path) as f: duration = f.duration length=0 size=0 - + hash1="" if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0: - db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length ) + db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length,hash=hash1 ) db.commit() return "Add %s in mode %s"%(path,mode) else: item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last() modification_count=item.modification_count + 1 - db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count) + db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count,hash=hash1) db.commit() return "Update %s in mode %s"%(path,mode) + + +@app.get("/addPrompt") +@app.post("/addPrompt") +def addPrompt(response:Response5): + """Api to add information of Trusted data + + Args: + response (Response3): 3 params: + path : path of archive on system if is a file OR text if is text. + Trusted : information Trusted or better information in a process. + mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice, + + Returns: + _type_: _description_ + """ + prompt=response.prompt + mode=response.mode + last_modified=datetime.now() + if mode not in mode_list.keys(): + return JSONResponse( + status_code=404, + content={"content": "mode no found" } + ) + if mode == "llm_compra": + hash1 = str(hashlib.sha256(prompt.encode()).hexdigest()) + # with open("example/texto/"+hash1, 'w') as f: + # json.dump(info, f) + # path=pwd+"/"+pathText+hash1 + length=len(prompt) + if db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).count()==0: + db.prompt.insert(prompt=prompt,mode=mode,last_modified=last_modified,length=length,hash=hash1 ) + db.commit() + return "Add %s in mode %s"%(prompt,mode) + else: + A=db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).update(prompt=prompt,mode=mode,last_modified=last_modified,length=length+1,hash=hash1) + db.commit() + print(A,last_modified) + return "Update %s in mode %s"%(prompt,mode) + @app.get("/EvalVoice") @app.post("/EvalVoice") @@ -141,17 +197,18 @@ def EvalVoice(response:Response): ) Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted - print(Trusted) if model=="whisper": Sal=main.EvalWhisper(path,Trusted) else: Sal=main.EvalVosk(path,Trusted) Sal["last_modified"]=datetime.now() - if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0: + if db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).count()==0: + print(1,Sal) db.analitic_voice.insert(**Sal) db.commit() else: - db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"]) + print(2,Sal) + db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"]) db.commit() return Sal @@ -192,7 +249,7 @@ def EvalVoicehtml(): -

Petición POST a API

+

Petición Evaluar modelo de voz comtra datos curados

+ %s + + +
+ +
+ +
+ +
+ +
+ + + + + """%(Sal,Sal2) + return HTMLResponse(content=html, status_code=200) + + #Por revisar def list2tablehtml(listdata,model): diff --git a/databases.py b/databases.py index b5bae82..d19b055 100644 --- a/databases.py +++ b/databases.py @@ -9,8 +9,20 @@ db.define_table( Field("sizeMB",type="double",default=0),# audio,factura Field("length",type="integer",default=0),#texto Field('last_modified', 'datetime'), - Field('modification_count', 'integer', default=0) + Field('modification_count', 'integer', default=0), + Field('hash') ) + +db.define_table( + "prompt", + Field("prompt"), + Field("mode"), + Field("length",type="integer",default=0), + Field('hash',unique=True), + Field('last_modified', 'datetime'), +) + + db.define_table( "analitic_voice", Field("content"), diff --git a/gui.py b/gui.py index f96279a..eeb8233 100644 --- a/gui.py +++ b/gui.py @@ -7,12 +7,9 @@ import pandas as pd import requests import statistics from databases import db +import time pwd = os.getcwd() -HTML = os.path.join(pwd,"html", "index.html") -file_read = codecs.open(HTML, "r", "utf-8") -index = file_read.read() -html_page_index = Html(index) def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"): configPath=os.path.join(os.getcwd(),relPath) with open(configPath, 'r', encoding='utf-8') as file: @@ -20,6 +17,8 @@ def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experime Output= config[dataOut] return Output mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list") + + def getmetricvoice(model): rows = db(db.analitic_voice.model==model).select() rows_list = rows.as_list() @@ -36,70 +35,113 @@ def getmetricvoice(model): def html_getmetricvoice(): models=list() + t=time.time() for row in db().select(db.analitic_voice.model, distinct=True): models.append(row.model) data={} for model in models: data[model]=getmetricvoice(model) data=pd.DataFrame(data).T - datafiles={} + data_files={} for row in db().select(db.analitic_voice.ALL): - datafiles[row.id]=row.as_dict() - datafiles=pd.DataFrame(datafiles).T + data_files[row.id]=row.as_dict() + #print(datafiles) + data_files=pd.DataFrame(data_files).T + + #table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'], + #columns=['model'], aggfunc="sum") + #print(table,table.columns) + html=""" +

Data general de los modelos

{data_voice} +

Data de cada muestra

{data_files_voice} - """ - - return html,data,datafiles -html_page_getmetricsvoice,data_voice,data_files_voices=html_getmetricvoice() - -mode="voice" -modetypedata="audio" -file="id2" -def changemenu(mode): - if mode_list[mode]=="audio": - pathori="example/audio" - if mode_list[mode]=="factura": - pathori="example/factura" - if mode_list[mode]=="texto": - pathori="example/texto" - seltypedata=mode_list[mode] - dir_list = os.listdir(pathori) - return pathori,seltypedata,dir_list + + """ + #{data_files_voice} + print(time.time()-t) + return html,data,data_files + +def getmetricllm_compra(model): + rows = db(db.analitic_llm_compra.model==model).select() + rows_list = rows.as_list() + data=pd.DataFrame(rows_list) + durationL=list() + for i in rows_list: + durationL.append(db(db.trusted.path == i["path"] ).select().last().duration) + duration=statistics.mean(durationL) + time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0] + similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0] + similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0] + efectivetime=time/duration + return ({"model":model,"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"efectivetime":efectivetime}) + +def html_getmetricllm_compra(): + models=list() + t=time.time() + for row in db().select(db.analitic_llm_compra.model, distinct=True): + models.append(row.model) + data={} + for model in models: + data[model]=getmetricllm_compra(model) + data=pd.DataFrame(data).T + data_files={} + for row in db().select(db.analitic_llm_compra.ALL): + data_files[row.id]=row.as_dict() + #print(datafiles) + data_files=pd.DataFrame(data_files).T + + #table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'], + #columns=['model'], aggfunc="sum") + #print(table,table.columns) + + html=""" +

Data general de los modelos

+ {data_voice} +

Data de cada muestra

+ {data_files_voice} + + + """ + #{data_files_voice} + print(time.time()-t) + return html,data,data_files -def trustedallhtml(mode): - pathori,seltypedata,dir_list=changemenu(mode) +def on_init(state): + state.html_page_getmetricsvoice,state.data_voice,state.data_files_voice=html_getmetricvoice() + pass + + + +html_page_getmetricsvoice,data_voice,data_files_voice=html_getmetricvoice() +# mode="voice" +# modetypedata="audio" +# file="id2" +# def changemenu(mode): +# if mode_list[mode]=="audio": +# pathori="example/audio" +# if mode_list[mode]=="factura": +# pathori="example/factura" +# if mode_list[mode]=="texto": +# pathori="example/texto" +# seltypedata=mode_list[mode] +# dir_list = os.listdir(pathori) + +# return pathori,seltypedata,dir_list - textmode="" - for modeused in mode_list.keys(): - textmode=textmode+"('%s','%s'),"%(modeused,modeused) - html="""{sel}"""%(textmode) - Sal="" - for i in dir_list: - temp="""('%s', '%s'),"""%(str(pwd+"/"+pathori+"/"+i),str(i)) - Sal=Sal+temp - html2="""{sel2}"""%(Sal) - return html+html2 -html_page_trustedall = Html(trustedallhtml(mode)) -#print(sel,sel2,seltypedata) -HTML = os.path.join(pwd,"html", "index.html") -file_read = codecs.open(HTML, "r", "utf-8") -index = file_read.read() -html_page_index = Html(index) data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) pages = { - "/": html_page_index , "getmetricsvoice": Html(html_page_getmetricsvoice), - "trustedall":html_page_trustedall } app = Gui(pages=pages) +app.on_init=on_init if __name__=="__main__": - app.run(use_reloader=True,port=7882, change_delay=1600)#state.imageActive2, + app.run(use_reloader=True,port=7882)#state.imageActive2, diff --git a/main.py b/main.py index fc6f87d..47aef3a 100644 --- a/main.py +++ b/main.py @@ -15,11 +15,8 @@ def EvalVoice2Text(endpoint,datajson,Trusted): """ apiUrl=urlAud+endpoint response = requests.get(apiUrl, json=datajson) - print(datajson) A=json.loads(response.content) - print(A) time=A['time'] - similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower()) similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower()) path=datajson["local"] @@ -34,27 +31,59 @@ def EvalVoice2Text(endpoint,datajson,Trusted): "path":path } - def EvalWhisper(path,Trusted=""): endpoint="/voice2txt" datajson={"url":"","password":password ,"model":"whisper","local":path} return EvalVoice2Text(endpoint,datajson,Trusted) - -# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg", -# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.", -# endpoint="/voice2txt") - def EvalVosk(path,Trusted=""): endpoint="/voice2txtlocal" datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path} return EvalVoice2Text(endpoint,datajson,Trusted) +def EvalLLMCompra(endpoint,datajson,Trusted): + """Evaluate Voice 2 text + """ + apiUrl=urlText+endpoint + response = requests.get(apiUrl, json=datajson) + A=json.loads(response.content) + time=A['time'] + print(A) + similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower()) + similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower()) + #path=datajson["local"] + model=datajson["model"] + + message=A['content'] + return {"content":message, + "trusted":Trusted, + "model":model, + "time":time, + "similarity":similarity, + "similaritypartial":similarityPartial, + "path":message + } + +def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted): + endpoint="/genTextCustom" + datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens} + return EvalLLMCompra(endpoint,datajson,Trusted) + + + + + + + + # EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg", # Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.", # endpoint="/voice2txtlocal") +# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg", +# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.", +# endpoint="/voice2txt") def ocrfacturas(path,task_prompt):