EvalCompra Ok

This commit is contained in:
Mario Gil 2024-08-03 01:38:04 -05:00
parent b5f235f5ae
commit 881d3074cf
4 changed files with 340 additions and 69 deletions

214
apis.py
View File

@ -45,6 +45,14 @@ class Response(BaseModel):
""" """
path: str = Query("", description="Style and sentiments of text") path: str = Query("", description="Style and sentiments of text")
model : str = Query("whisper", description="Style and sentiments of text") model : str = Query("whisper", description="Style and sentiments of text")
class Response4(BaseModel):
path: str = Query("", description="path file")
system: str = Query("", description="prompt system LLM model with ocr and image claude")
content: str = Query("%s", description="prompt content LLM model with ocr")
max_tokens: int = Query(1024, description="maxtoken LLM OCR model")
model: str = Query("Claude-sonnet", description="model")
class Response1(BaseModel): class Response1(BaseModel):
path: str = Query("", description="path file") path: str = Query("", description="path file")
task_prompt: str = Query("", description="task of model") task_prompt: str = Query("", description="task of model")
@ -66,7 +74,14 @@ class Response3(BaseModel):
""" """
path: str = Query("", description="Style and sentiments of text") path: str = Query("", description="Style and sentiments of text")
Trusted: str = Query("", description="Style and sentiments of text") Trusted: str = Query("", description="Style and sentiments of text")
mode : str = Query("whisper", description="Style and sentiments of text") mode : str = Query("", description="Style and sentiments of text")
class Response5(BaseModel):
"""Structure of data to querry of make post from X or article blog
"""
prompt: str = Query("", description="Style and sentiments of text")
mode : str = Query("", description="Style and sentiments of text")
#Funcionales #Funcionales
@app.get("/addTrusted") @app.get("/addTrusted")
@ -99,11 +114,11 @@ def addTrusted(response:Response3):
content={"content": "file no found" } content={"content": "file no found" }
) )
if mode_list[mode]=="texto": if mode_list[mode]=="texto":
hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt" info=str({"path":path,"trusted":Trusted,"mode":mode})
f = open("example/texto/"+hash1, "w") hash1 = hashlib.sha256(info.encode()).hexdigest()
f.write(path) # with open("example/texto/"+hash1, 'w') as f:
f.close() # json.dump(info, f)
path=pwd+"/"+pathText+hash1 # path=pwd+"/"+pathText+hash1
length=len(Trusted) length=len(Trusted)
size=0 size=0
duration=0 duration=0
@ -112,22 +127,63 @@ def addTrusted(response:Response3):
size=file_stats.st_size / (1024 * 1024) size=file_stats.st_size / (1024 * 1024)
length=0 length=0
duration=0 duration=0
hash1=""
elif mode_list[mode]=="audio": elif mode_list[mode]=="audio":
with audioread.audio_open(path) as f: with audioread.audio_open(path) as f:
duration = f.duration duration = f.duration
length=0 length=0
size=0 size=0
hash1=""
if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0: if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0:
db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length ) db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length,hash=hash1 )
db.commit() db.commit()
return "Add %s in mode %s"%(path,mode) return "Add %s in mode %s"%(path,mode)
else: else:
item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last() item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last()
modification_count=item.modification_count + 1 modification_count=item.modification_count + 1
db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count) db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count,hash=hash1)
db.commit() db.commit()
return "Update %s in mode %s"%(path,mode) return "Update %s in mode %s"%(path,mode)
@app.get("/addPrompt")
@app.post("/addPrompt")
def addPrompt(response:Response5):
"""Api to add information of Trusted data
Args:
response (Response3): 3 params:
path : path of archive on system if is a file OR text if is text.
Trusted : information Trusted or better information in a process.
mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice,
Returns:
_type_: _description_
"""
prompt=response.prompt
mode=response.mode
last_modified=datetime.now()
if mode not in mode_list.keys():
return JSONResponse(
status_code=404,
content={"content": "mode no found" }
)
if mode == "llm_compra":
hash1 = str(hashlib.sha256(prompt.encode()).hexdigest())
# with open("example/texto/"+hash1, 'w') as f:
# json.dump(info, f)
# path=pwd+"/"+pathText+hash1
length=len(prompt)
if db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).count()==0:
db.prompt.insert(prompt=prompt,mode=mode,last_modified=last_modified,length=length,hash=hash1 )
db.commit()
return "Add %s in mode %s"%(prompt,mode)
else:
A=db((db.prompt.hash == hash1)&(db.prompt.mode == mode)).update(prompt=prompt,mode=mode,last_modified=last_modified,length=length+1,hash=hash1)
db.commit()
print(A,last_modified)
return "Update %s in mode %s"%(prompt,mode)
@app.get("/EvalVoice") @app.get("/EvalVoice")
@app.post("/EvalVoice") @app.post("/EvalVoice")
@ -141,17 +197,18 @@ def EvalVoice(response:Response):
) )
Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted
print(Trusted)
if model=="whisper": if model=="whisper":
Sal=main.EvalWhisper(path,Trusted) Sal=main.EvalWhisper(path,Trusted)
else: else:
Sal=main.EvalVosk(path,Trusted) Sal=main.EvalVosk(path,Trusted)
Sal["last_modified"]=datetime.now() Sal["last_modified"]=datetime.now()
if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0: if db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).count()==0:
print(1,Sal)
db.analitic_voice.insert(**Sal) db.analitic_voice.insert(**Sal)
db.commit() db.commit()
else: else:
db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"]) print(2,Sal)
db((db.analitic_voice.path == Sal["path"]) & (db.analitic_voice.model == Sal["model"])).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
db.commit() db.commit()
return Sal return Sal
@ -192,7 +249,7 @@ def EvalVoicehtml():
</style> </style>
</head> </head>
<body> <body>
<h1>Petición POST a API</h1> <h1>Petición Evaluar modelo de voz comtra datos curados</h1>
<select id="texto1"> <select id="texto1">
%s %s
@ -237,6 +294,137 @@ def EvalVoicehtml():
"""%(Sal) """%(Sal)
return HTMLResponse(content=html, status_code=200) return HTMLResponse(content=html, status_code=200)
@app.get("/EvalLLMCompra")
@app.post("/EvalLLMCompra")
def EvalLLMCompra(response:Response4):
content=response.path
model=response.model
system= response.system
max_tokens= response.max_tokens
path=content
if db((db.trusted.path == path ) & ( db.trusted.mode == "llm_compra")).count()==0:
return JSONResponse(
status_code=404,
content={"content": "Trusted no found" }
)
Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "llm_compra")).select().last().trusted
Sal=main.EvalModelLLMCompra(system,content,model,max_tokens,Trusted)
Sal["last_modified"]=datetime.now()
if db((db.analitic_llm_compra.path == Sal["path"]) & (db.analitic_llm_compra.model == Sal["model"])).count()==0:
print(1,Sal)
db.analitic_llm_compra.insert(**Sal)
db.commit()
else:
print(2,Sal)
db((db.analitic_llm_compra.path == Sal["path"]) & (db.analitic_llm_compra.model == Sal["model"])).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
db.commit()
return Sal
@app.get("/evalllmcomprahtml")
def EvalLLMComprahtml():
dir_list = db((db.trusted.mode == "llm_compra" )).select()
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(i.path,str(t),str(i.path))
Sal=Sal+temp
t=t+1
dir_list2 = db((db.prompt.mode == "llm_compra" )).select()
Sal2=""
t=1
for i in dir_list2:
temp="""<option value="%s">Opción %s, %s</option>
"""%(i.prompt,str(t),str(i.prompt))
Sal2=Sal2+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición Evaluar modelo de voz comtra datos curados</h1>
<select id="texto1">
%s
</select>
<br>
<select id="texto2">
<option value="meta-llama/Meta-Llama-3.1-70B-Instruct">meta-llama/Meta-Llama-3.1-70B-Instruct</option>
<option value="meta-llama/Meta-Llama-3.1-8B-Instruct">meta-llama/Meta-Llama-3.1-8B-Instruct</option>
<option value="Mistral">Mistral</option>
</select>
<br>
<select id="texto3">
%s
</select>
<br>
<input type="text" id="texto4" placeholder="max_tokens">
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const texto3 = document.getElementById('texto3').value;
const datos = {
path: texto1,
model: texto2,
system: texto3
};
fetch('/EvalLLMCompra', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal,Sal2)
return HTMLResponse(content=html, status_code=200)
#Por revisar #Por revisar
def list2tablehtml(listdata,model): def list2tablehtml(listdata,model):

View File

@ -9,8 +9,20 @@ db.define_table(
Field("sizeMB",type="double",default=0),# audio,factura Field("sizeMB",type="double",default=0),# audio,factura
Field("length",type="integer",default=0),#texto Field("length",type="integer",default=0),#texto
Field('last_modified', 'datetime'), Field('last_modified', 'datetime'),
Field('modification_count', 'integer', default=0) Field('modification_count', 'integer', default=0),
Field('hash')
) )
db.define_table(
"prompt",
Field("prompt"),
Field("mode"),
Field("length",type="integer",default=0),
Field('hash',unique=True),
Field('last_modified', 'datetime'),
)
db.define_table( db.define_table(
"analitic_voice", "analitic_voice",
Field("content"), Field("content"),

134
gui.py
View File

@ -7,12 +7,9 @@ import pandas as pd
import requests import requests
import statistics import statistics
from databases import db from databases import db
import time
pwd = os.getcwd() pwd = os.getcwd()
HTML = os.path.join(pwd,"html", "index.html")
file_read = codecs.open(HTML, "r", "utf-8")
index = file_read.read()
html_page_index = Html(index)
def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"): def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
configPath=os.path.join(os.getcwd(),relPath) configPath=os.path.join(os.getcwd(),relPath)
with open(configPath, 'r', encoding='utf-8') as file: with open(configPath, 'r', encoding='utf-8') as file:
@ -20,6 +17,8 @@ def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experime
Output= config[dataOut] Output= config[dataOut]
return Output return Output
mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list") mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
def getmetricvoice(model): def getmetricvoice(model):
rows = db(db.analitic_voice.model==model).select() rows = db(db.analitic_voice.model==model).select()
rows_list = rows.as_list() rows_list = rows.as_list()
@ -36,70 +35,113 @@ def getmetricvoice(model):
def html_getmetricvoice(): def html_getmetricvoice():
models=list() models=list()
t=time.time()
for row in db().select(db.analitic_voice.model, distinct=True): for row in db().select(db.analitic_voice.model, distinct=True):
models.append(row.model) models.append(row.model)
data={} data={}
for model in models: for model in models:
data[model]=getmetricvoice(model) data[model]=getmetricvoice(model)
data=pd.DataFrame(data).T data=pd.DataFrame(data).T
datafiles={} data_files={}
for row in db().select(db.analitic_voice.ALL): for row in db().select(db.analitic_voice.ALL):
datafiles[row.id]=row.as_dict() data_files[row.id]=row.as_dict()
datafiles=pd.DataFrame(datafiles).T #print(datafiles)
data_files=pd.DataFrame(data_files).T
#table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'],
#columns=['model'], aggfunc="sum")
#print(table,table.columns)
html=""" html="""
<h1>Data general de los modelos</h1>
<taipy:table>{data_voice}</taipy:table> <taipy:table>{data_voice}</taipy:table>
<h1>Data de cada muestra</h1>
<taipy:table filter=True>{data_files_voice}</taipy:table> <taipy:table filter=True>{data_files_voice}</taipy:table>
"""
return html,data,datafiles
html_page_getmetricsvoice,data_voice,data_files_voices=html_getmetricvoice()
mode="voice"
modetypedata="audio"
file="id2"
def changemenu(mode):
if mode_list[mode]=="audio":
pathori="example/audio"
if mode_list[mode]=="factura":
pathori="example/factura"
if mode_list[mode]=="texto":
pathori="example/texto"
seltypedata=mode_list[mode]
dir_list = os.listdir(pathori)
return pathori,seltypedata,dir_list
"""
#<taipy:chart mode="markers" x="x" y[1]="time" y[2]="similarity">{data_files_voice}</taipy:chart>
print(time.time()-t)
return html,data,data_files
def getmetricllm_compra(model):
rows = db(db.analitic_llm_compra.model==model).select()
rows_list = rows.as_list()
data=pd.DataFrame(rows_list)
durationL=list()
for i in rows_list:
durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
duration=statistics.mean(durationL)
time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
efectivetime=time/duration
return ({"model":model,"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"efectivetime":efectivetime})
def html_getmetricllm_compra():
models=list()
t=time.time()
for row in db().select(db.analitic_llm_compra.model, distinct=True):
models.append(row.model)
data={}
for model in models:
data[model]=getmetricllm_compra(model)
data=pd.DataFrame(data).T
data_files={}
for row in db().select(db.analitic_llm_compra.ALL):
data_files[row.id]=row.as_dict()
#print(datafiles)
data_files=pd.DataFrame(data_files).T
#table = pd.pivot_table(data_files, values=['path', 'similarity','similaritypartial'], index=['path'],
#columns=['model'], aggfunc="sum")
#print(table,table.columns)
html="""
<h1>Data general de los modelos</h1>
<taipy:table>{data_voice}</taipy:table>
<h1>Data de cada muestra</h1>
<taipy:table filter=True>{data_files_voice}</taipy:table>
"""
#<taipy:chart mode="markers" x="x" y[1]="time" y[2]="similarity">{data_files_voice}</taipy:chart>
print(time.time()-t)
return html,data,data_files
def trustedallhtml(mode): def on_init(state):
pathori,seltypedata,dir_list=changemenu(mode) state.html_page_getmetricsvoice,state.data_voice,state.data_files_voice=html_getmetricvoice()
pass
html_page_getmetricsvoice,data_voice,data_files_voice=html_getmetricvoice()
# mode="voice"
# modetypedata="audio"
# file="id2"
# def changemenu(mode):
# if mode_list[mode]=="audio":
# pathori="example/audio"
# if mode_list[mode]=="factura":
# pathori="example/factura"
# if mode_list[mode]=="texto":
# pathori="example/texto"
# seltypedata=mode_list[mode]
# dir_list = os.listdir(pathori)
# return pathori,seltypedata,dir_list
textmode=""
for modeused in mode_list.keys():
textmode=textmode+"('%s','%s'),"%(modeused,modeused)
html="""<taipy:selector lov="{[%s]}" dropdown True on_change=changemenu>{sel}</taipy:selector>"""%(textmode)
Sal=""
for i in dir_list:
temp="""('%s', '%s'),"""%(str(pwd+"/"+pathori+"/"+i),str(i))
Sal=Sal+temp
html2="""<taipy:selector lov="{[%s]}" dropdown True >{sel2}</taipy:selector>"""%(Sal)
return html+html2
html_page_trustedall = Html(trustedallhtml(mode))
#print(sel,sel2,seltypedata)
HTML = os.path.join(pwd,"html", "index.html")
file_read = codecs.open(HTML, "r", "utf-8")
index = file_read.read()
html_page_index = Html(index)
data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]}) data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
pages = { pages = {
"/": html_page_index ,
"getmetricsvoice": Html(html_page_getmetricsvoice), "getmetricsvoice": Html(html_page_getmetricsvoice),
"trustedall":html_page_trustedall
} }
app = Gui(pages=pages) app = Gui(pages=pages)
app.on_init=on_init
if __name__=="__main__": if __name__=="__main__":
app.run(use_reloader=True,port=7882, change_delay=1600)#state.imageActive2, app.run(use_reloader=True,port=7882)#state.imageActive2,

47
main.py
View File

@ -15,11 +15,8 @@ def EvalVoice2Text(endpoint,datajson,Trusted):
""" """
apiUrl=urlAud+endpoint apiUrl=urlAud+endpoint
response = requests.get(apiUrl, json=datajson) response = requests.get(apiUrl, json=datajson)
print(datajson)
A=json.loads(response.content) A=json.loads(response.content)
print(A)
time=A['time'] time=A['time']
similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower()) similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower()) similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
path=datajson["local"] path=datajson["local"]
@ -34,27 +31,59 @@ def EvalVoice2Text(endpoint,datajson,Trusted):
"path":path "path":path
} }
def EvalWhisper(path,Trusted=""): def EvalWhisper(path,Trusted=""):
endpoint="/voice2txt" endpoint="/voice2txt"
datajson={"url":"","password":password ,"model":"whisper","local":path} datajson={"url":"","password":password ,"model":"whisper","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted) return EvalVoice2Text(endpoint,datajson,Trusted)
# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txt")
def EvalVosk(path,Trusted=""): def EvalVosk(path,Trusted=""):
endpoint="/voice2txtlocal" endpoint="/voice2txtlocal"
datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path} datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted) return EvalVoice2Text(endpoint,datajson,Trusted)
def EvalLLMCompra(endpoint,datajson,Trusted):
"""Evaluate Voice 2 text
"""
apiUrl=urlText+endpoint
response = requests.get(apiUrl, json=datajson)
A=json.loads(response.content)
time=A['time']
print(A)
similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
#path=datajson["local"]
model=datajson["model"]
message=A['content']
return {"content":message,
"trusted":Trusted,
"model":model,
"time":time,
"similarity":similarity,
"similaritypartial":similarityPartial,
"path":message
}
def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted):
endpoint="/genTextCustom"
datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
return EvalLLMCompra(endpoint,datajson,Trusted)
# EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg", # EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.", # Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txtlocal") # endpoint="/voice2txtlocal")
# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txt")
def ocrfacturas(path,task_prompt): def ocrfacturas(path,task_prompt):