EvalDataSetHugging/main.py

187 lines
6.5 KiB
Python

import requests
import evaluate
import deepdiff
import json
from fuzzywuzzy import fuzz
from deepdiff import DeepDiff
from deepdiff import Delta
import databases
#print(evaluate.list_evaluation_modules())
urlAud="http://127.0.0.1:7870/"
urlText="http://127.0.0.1:7869"
password="1223Aer*"
def EvalVoice2Text(endpoint,datajson,Trusted):
"""Evaluate Voice 2 text
"""
apiUrl=urlAud+endpoint
response = requests.get(apiUrl, json=datajson)
A=json.loads(response.content)
time=A['time']
similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
path=datajson["local"]
model=datajson["model"]
message=A['message']
return {"content":message,
"trusted":Trusted,
"model":model,
"time":time,
"similarity":similarity,
"similaritypartial":similarityPartial,
"path":path
}
def EvalWhisper(path,Trusted=""):
endpoint="/voice2txt"
datajson={"url":"","password":password ,"model":"whisper","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted)
def EvalVosk(path,Trusted=""):
endpoint="/voice2txtlocal"
datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted)
def EvalLLMCompra(endpoint,datajson,Trusted):
"""Evaluate Voice 2 text
"""
apiUrl=urlText+endpoint
response = requests.get(apiUrl, json=datajson)
A=json.loads(response.content)
time=A['time']
print(A)
similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
#path=datajson["local"]
model=datajson["model"]
message=A['content']
return {"content":message,
"trusted":Trusted,
"model":model,
"time":time,
"similarity":similarity,
"similaritypartial":similarityPartial,
"path":message
}
def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted):
endpoint="/genTextCustom"
datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
return EvalLLMCompra(endpoint,datajson,Trusted)
# EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txtlocal")
# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txt")
def ocrfacturas(path,task_prompt):
apiUrl=urlText+'/parsedimage3'
datajson={"path":path,"task_prompt":task_prompt,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def llmFacturas(path,task_prompt,system,content,max_tokens,model):
apiUrl=urlText+'/parsedimage4'
datajson={"path":path,"task_prompt":task_prompt,"system":system,"content":content,"max_tokens":max_tokens,"model":model,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def llmFacturas2(path,prompt,system,model):
apiUrl=urlText+'/parsedimage2'
datajson={"path":path,"prompt":prompt,"system":system,"model":model,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def EvalParsedImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg"):
endpoint="/parsedimage"
jsonT={"path":path,"password":password}
response=requests.get(urlText+endpoint,json=jsonT)
return response.content
def EvalParsedImage5(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",option="teserac"):
endpoint="/parsedimage5"
jsonT={"path":path,"password":password,"option":option}
response=requests.get(urlText+endpoint,json=jsonT)
return response.content
def EvalFacturas(path,task_prompt,TrustedOCR,option=""):
if task_prompt=="parsed":
OCR=EvalParsedImage(path)
if task_prompt=="More Detailed Caption" or task_prompt=='OCR':
OCR=ocrfacturas(path,task_prompt)
if task_prompt=="scan":
OCR=EvalParsedImage5(path,option)
model=json.loads(OCR)["model"]
content=json.loads(OCR)["content"]
time=json.loads(OCR)["time"]
try:
TrustedOCR=json.loads(TrustedOCR)
jsonok=1
except:
jsonok=0
pass
similarity=fuzz.ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
similarityPartial=fuzz.partial_ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
return {"content":content,
"trusted":TrustedOCR,
"similarity":similarity,
"similaritypartial":similarityPartial,
"model":model,
"time":time,
"jsonok":jsonok
}
def changemodel(model):
if model=="Claude-sonnet":
model="claude-3-5-sonnet-20240620"
elif model=="Claude-opus":
model="claude-3-opus-20240229"
elif model=="Claude-haiku":
model="claude-3-haiku-20240307"
return model
def EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson):
model=changemodel(model)
if model.count("claude")>0 and task_prompt=="":
LLmjson=llmFacturas2(path=path,prompt=prompt,system=system,model=model)
else:
LLmjson=llmFacturas(path=path,task_prompt=task_prompt,system=system,content=content,max_tokens=max_tokens,model=model)
TrustedLLmjson=json.loads(TrustedLLmjson)
return {"content":LLmjson,"trusted":TrustedLLmjson}
#EvalFacturas(path="example/Factura2.jpg",task_prompt="OCR",system="",content="Analiza el siguiente texto: %s",max_tokens=200,model="claude-sonnet")
def EvalClassImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",):
endpoint="classificateimage"
jsonT={"path":path,"password":password}
response=requests.get(urlText+endpoint,json=jsonT)
print(response.content)
#To Do
def EvalGeneratedText(prompt="",model="",):
pass
def EvalGenerateVoice():
def GenerateVoice():
pass
def Voice2txt():
pass