EvalDataSetHugging/main.py

249 lines
9.3 KiB
Python

import requests
import evaluate
import deepdiff
import json
import os
from fuzzywuzzy import fuzz
from deepdiff import DeepDiff
from deepdiff import Delta
import databases
import metrics
#print(evaluate.list_evaluation_modules())
pwd = os.getcwd()
urlAud="http://127.0.0.1:7870/"
urlText="http://127.0.0.1:7869"
def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
configPath=os.path.join(os.getcwd(),relPath)
with open(configPath, 'r', encoding='utf-8') as file:
config = json.load(file)[nameModel]
Output= config[dataOut]
return Output
mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
keyanthropic=extractConfig(nameModel="SystemData",dataOut="keyantrophics")
password=extractConfig(nameModel="SystemData",dataOut="password")
def EvalVoice2Text(endpoint,datajson,Trusted):
"""Evaluate Voice 2 text
"""
apiUrl=urlAud+endpoint
response = requests.get(apiUrl, json=datajson)
A=json.loads(response.content)
time=A['time']
similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
path=datajson["local"]
model=datajson["model"]
message=A['message']
return {"content":message,
"trusted":Trusted,
"model":model,
"time":time,
"similarity":similarity,
"similaritypartial":similarityPartial,
"path":path
}
def EvalWhisper(path,Trusted=""):
endpoint="/voice2txt"
datajson={"url":"","password":password ,"model":"whisper","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted)
def EvalVosk(path,Trusted=""):
endpoint="/voice2txtlocal"
datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted)
def EvalLLMCompra(endpoint,datajson,Trusted):
"""Evaluate LLL compra
"""
apiUrl=urlText+endpoint
response = requests.get(apiUrl, json=datajson)
A=json.loads(response.content)
time=A['time']
relevance=metrics.RelevanceMetric(datajson["system"]+datajson["content"],response.content)
bias=metrics.BiasMetric22(datajson["system"]+datajson["content"],response.content)
toxic=metrics.ToxicMetric(datajson["system"]+datajson["content"],response.content)
correctness=metrics.correctnessMetric(datajson["system"]+datajson["content"],response.content,Trusted)
#jsonmetrics=metrics.jsonMetrics(response.content,Trusted)
#similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
#similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
#path=datajson["local"]
model=datajson["model"]
message=A['content']
return {"content":message,
"trusted":Trusted,
"model":model,
"time":time,
"relevance":relevance["score"],
"bias":bias["score"],
"toxic":toxic["score"],
"correctness":correctness["score"],
"relevance_r":relevance["reason"],
"bias_r":bias["reason"],
"toxic_r":toxic["reason"],
"correctness_r":correctness["reason"],
"path":message
}
def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted):
endpoint="/genTextCustom"
datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
return EvalLLMCompra(endpoint,datajson,Trusted)
def EvalLLMGeneracionTexto(endpoint,datajson,Trusted):
"""Evaluate LLL compra
"""
apiUrl=urlText+endpoint
response = requests.get(apiUrl, json=datajson)
A=json.loads(response.content)
time=A['time']
relevance=metrics.RelevanceMetric(datajson["system"]+datajson["content"],response.content)
bias=metrics.BiasMetric22(datajson["system"]+datajson["content"],response.content)
toxic=metrics.ToxicMetric(datajson["system"]+datajson["content"],response.content)
correctness=metrics.correctnessMetric(datajson["system"]+datajson["content"],response.content,Trusted)
#jsonmetrics=metrics.jsonMetrics(response.content,Trusted)
#similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
#similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
#path=datajson["local"]
model=datajson["model"]
message=A['content']
return {"content":message,
"trusted":Trusted,
"model":model,
"time":time,
"relevance":relevance["score"],
"bias":bias["score"],
"toxic":toxic["score"],
"correctness":correctness["score"],
"relevance_r":relevance["reason"],
"bias_r":bias["reason"],
"toxic_r":toxic["reason"],
"correctness_r":correctness["reason"],
"path":message
}
def EvalModelLLMGeneracionTexto(system,content,model,max_new_tokens,Trusted):
endpoint="/genTextCustom"
datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
return EvalLLMGeneracionTexto(endpoint,datajson,Trusted)
# EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txtlocal")
# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txt")
def ocrfacturas(path,task_prompt):
apiUrl=urlText+'/parsedimage3'
datajson={"path":path,"task_prompt":task_prompt,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def llmFacturas(path,task_prompt,system,content,max_tokens,model):
apiUrl=urlText+'/parsedimage4'
datajson={"path":path,"task_prompt":task_prompt,"system":system,"content":content,"max_tokens":max_tokens,"model":model,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def llmFacturas2(path,prompt,system,model):
apiUrl=urlText+'/parsedimage2'
datajson={"path":path,"prompt":prompt,"system":system,"model":model,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def EvalParsedImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg"):
endpoint="/parsedimage"
jsonT={"path":path,"password":password}
response=requests.get(urlText+endpoint,json=jsonT)
return response.content
def EvalParsedImage5(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",option="teserac"):
endpoint="/parsedimage5"
jsonT={"path":path,"password":password,"option":option}
response=requests.get(urlText+endpoint,json=jsonT)
return response.content
def EvalFacturas(path,task_prompt,TrustedOCR,option=""):
if task_prompt=="parsed":
OCR=EvalParsedImage(path)
if task_prompt=="More Detailed Caption" or task_prompt=='OCR':
OCR=ocrfacturas(path,task_prompt)
if task_prompt=="scan":
OCR=EvalParsedImage5(path,option)
model=json.loads(OCR)["model"]
content=json.loads(OCR)["content"]
time=json.loads(OCR)["time"]
try:
TrustedOCR=json.loads(TrustedOCR)
jsonok=1
except:
jsonok=0
pass
similarity=fuzz.ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
similarityPartial=fuzz.partial_ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
return {"content":content,
"trusted":TrustedOCR,
"similarity":similarity,
"similaritypartial":similarityPartial,
"model":model,
"time":time,
"jsonok":jsonok
}
def changemodel(model):
if model=="Claude-sonnet":
model="claude-3-5-sonnet-20240620"
elif model=="Claude-opus":
model="claude-3-opus-20240229"
elif model=="Claude-haiku":
model="claude-3-haiku-20240307"
return model
def EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson):
model=changemodel(model)
if model.count("claude")>0 and task_prompt=="":
LLmjson=llmFacturas2(path=path,prompt=prompt,system=system,model=model)
else:
LLmjson=llmFacturas(path=path,task_prompt=task_prompt,system=system,content=content,max_tokens=max_tokens,model=model)
TrustedLLmjson=json.loads(TrustedLLmjson)
return {"content":LLmjson,"trusted":TrustedLLmjson}
#EvalFacturas(path="example/Factura2.jpg",task_prompt="OCR",system="",content="Analiza el siguiente texto: %s",max_tokens=200,model="claude-sonnet")
def EvalClassImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",):
endpoint="classificateimage"
jsonT={"path":path,"password":password}
response=requests.get(urlText+endpoint,json=jsonT)
print(response.content)
#To Do
def EvalGeneratedText(prompt="",model="",):
pass
def EvalGenerateVoice():
def GenerateVoice():
pass
def Voice2txt():
pass