249 lines
9.3 KiB
Python
249 lines
9.3 KiB
Python
import requests
|
|
import evaluate
|
|
import deepdiff
|
|
import json
|
|
import os
|
|
|
|
from fuzzywuzzy import fuzz
|
|
from deepdiff import DeepDiff
|
|
from deepdiff import Delta
|
|
import databases
|
|
import metrics
|
|
#print(evaluate.list_evaluation_modules())
|
|
pwd = os.getcwd()
|
|
urlAud="http://127.0.0.1:7870/"
|
|
urlText="http://127.0.0.1:7869"
|
|
|
|
|
|
def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
|
|
configPath=os.path.join(os.getcwd(),relPath)
|
|
with open(configPath, 'r', encoding='utf-8') as file:
|
|
config = json.load(file)[nameModel]
|
|
Output= config[dataOut]
|
|
return Output
|
|
mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
|
|
keyanthropic=extractConfig(nameModel="SystemData",dataOut="keyantrophics")
|
|
password=extractConfig(nameModel="SystemData",dataOut="password")
|
|
|
|
|
|
def EvalVoice2Text(endpoint,datajson,Trusted):
|
|
"""Evaluate Voice 2 text
|
|
"""
|
|
apiUrl=urlAud+endpoint
|
|
response = requests.get(apiUrl, json=datajson)
|
|
A=json.loads(response.content)
|
|
time=A['time']
|
|
similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
|
|
similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
|
|
path=datajson["local"]
|
|
model=datajson["model"]
|
|
message=A['message']
|
|
return {"content":message,
|
|
"trusted":Trusted,
|
|
"model":model,
|
|
"time":time,
|
|
"similarity":similarity,
|
|
"similaritypartial":similarityPartial,
|
|
"path":path
|
|
}
|
|
|
|
def EvalWhisper(path,Trusted=""):
|
|
endpoint="/voice2txt"
|
|
datajson={"url":"","password":password ,"model":"whisper","local":path}
|
|
return EvalVoice2Text(endpoint,datajson,Trusted)
|
|
|
|
def EvalVosk(path,Trusted=""):
|
|
endpoint="/voice2txtlocal"
|
|
datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
|
|
return EvalVoice2Text(endpoint,datajson,Trusted)
|
|
|
|
|
|
def EvalLLMCompra(endpoint,datajson,Trusted):
|
|
"""Evaluate LLL compra
|
|
"""
|
|
apiUrl=urlText+endpoint
|
|
response = requests.get(apiUrl, json=datajson)
|
|
A=json.loads(response.content)
|
|
time=A['time']
|
|
relevance=metrics.RelevanceMetric(datajson["system"]+datajson["content"],response.content)
|
|
bias=metrics.BiasMetric22(datajson["system"]+datajson["content"],response.content)
|
|
toxic=metrics.ToxicMetric(datajson["system"]+datajson["content"],response.content)
|
|
correctness=metrics.correctnessMetric(datajson["system"]+datajson["content"],response.content,Trusted)
|
|
#jsonmetrics=metrics.jsonMetrics(response.content,Trusted)
|
|
#similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
|
|
#similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
|
|
#path=datajson["local"]
|
|
model=datajson["model"]
|
|
|
|
message=A['content']
|
|
return {"content":message,
|
|
"trusted":Trusted,
|
|
"model":model,
|
|
"time":time,
|
|
"relevance":relevance["score"],
|
|
"bias":bias["score"],
|
|
"toxic":toxic["score"],
|
|
"correctness":correctness["score"],
|
|
"relevance_r":relevance["reason"],
|
|
"bias_r":bias["reason"],
|
|
"toxic_r":toxic["reason"],
|
|
"correctness_r":correctness["reason"],
|
|
"path":message
|
|
}
|
|
|
|
def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted):
|
|
endpoint="/genTextCustom"
|
|
datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
|
|
return EvalLLMCompra(endpoint,datajson,Trusted)
|
|
|
|
def EvalLLMGeneracionTexto(endpoint,datajson,Trusted):
|
|
"""Evaluate LLL compra
|
|
"""
|
|
apiUrl=urlText+endpoint
|
|
response = requests.get(apiUrl, json=datajson)
|
|
A=json.loads(response.content)
|
|
time=A['time']
|
|
relevance=metrics.RelevanceMetric(datajson["system"]+datajson["content"],response.content)
|
|
bias=metrics.BiasMetric22(datajson["system"]+datajson["content"],response.content)
|
|
toxic=metrics.ToxicMetric(datajson["system"]+datajson["content"],response.content)
|
|
correctness=metrics.correctnessMetric(datajson["system"]+datajson["content"],response.content,Trusted)
|
|
#jsonmetrics=metrics.jsonMetrics(response.content,Trusted)
|
|
#similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
|
|
#similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
|
|
#path=datajson["local"]
|
|
model=datajson["model"]
|
|
|
|
message=A['content']
|
|
return {"content":message,
|
|
"trusted":Trusted,
|
|
"model":model,
|
|
"time":time,
|
|
"relevance":relevance["score"],
|
|
"bias":bias["score"],
|
|
"toxic":toxic["score"],
|
|
"correctness":correctness["score"],
|
|
"relevance_r":relevance["reason"],
|
|
"bias_r":bias["reason"],
|
|
"toxic_r":toxic["reason"],
|
|
"correctness_r":correctness["reason"],
|
|
"path":message
|
|
}
|
|
|
|
def EvalModelLLMGeneracionTexto(system,content,model,max_new_tokens,Trusted):
|
|
endpoint="/genTextCustom"
|
|
datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
|
|
return EvalLLMGeneracionTexto(endpoint,datajson,Trusted)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
# EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
|
|
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
|
|
# endpoint="/voice2txtlocal")
|
|
# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
|
|
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
|
|
# endpoint="/voice2txt")
|
|
|
|
|
|
def ocrfacturas(path,task_prompt):
|
|
apiUrl=urlText+'/parsedimage3'
|
|
datajson={"path":path,"task_prompt":task_prompt,"password":password}
|
|
response = requests.get(apiUrl, json=datajson)
|
|
return response.content
|
|
|
|
def llmFacturas(path,task_prompt,system,content,max_tokens,model):
|
|
apiUrl=urlText+'/parsedimage4'
|
|
datajson={"path":path,"task_prompt":task_prompt,"system":system,"content":content,"max_tokens":max_tokens,"model":model,"password":password}
|
|
response = requests.get(apiUrl, json=datajson)
|
|
return response.content
|
|
|
|
def llmFacturas2(path,prompt,system,model):
|
|
apiUrl=urlText+'/parsedimage2'
|
|
datajson={"path":path,"prompt":prompt,"system":system,"model":model,"password":password}
|
|
response = requests.get(apiUrl, json=datajson)
|
|
return response.content
|
|
|
|
def EvalParsedImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg"):
|
|
endpoint="/parsedimage"
|
|
jsonT={"path":path,"password":password}
|
|
response=requests.get(urlText+endpoint,json=jsonT)
|
|
return response.content
|
|
|
|
def EvalParsedImage5(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",option="teserac"):
|
|
endpoint="/parsedimage5"
|
|
jsonT={"path":path,"password":password,"option":option}
|
|
response=requests.get(urlText+endpoint,json=jsonT)
|
|
return response.content
|
|
|
|
def EvalFacturas(path,task_prompt,TrustedOCR,option=""):
|
|
if task_prompt=="parsed":
|
|
OCR=EvalParsedImage(path)
|
|
if task_prompt=="More Detailed Caption" or task_prompt=='OCR':
|
|
OCR=ocrfacturas(path,task_prompt)
|
|
if task_prompt=="scan":
|
|
OCR=EvalParsedImage5(path,option)
|
|
model=json.loads(OCR)["model"]
|
|
content=json.loads(OCR)["content"]
|
|
time=json.loads(OCR)["time"]
|
|
try:
|
|
TrustedOCR=json.loads(TrustedOCR)
|
|
jsonok=1
|
|
except:
|
|
jsonok=0
|
|
pass
|
|
similarity=fuzz.ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
|
|
similarityPartial=fuzz.partial_ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
|
|
return {"content":content,
|
|
"trusted":TrustedOCR,
|
|
"similarity":similarity,
|
|
"similaritypartial":similarityPartial,
|
|
"model":model,
|
|
"time":time,
|
|
"jsonok":jsonok
|
|
}
|
|
def changemodel(model):
|
|
if model=="Claude-sonnet":
|
|
model="claude-3-5-sonnet-20240620"
|
|
elif model=="Claude-opus":
|
|
model="claude-3-opus-20240229"
|
|
elif model=="Claude-haiku":
|
|
model="claude-3-haiku-20240307"
|
|
return model
|
|
|
|
def EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson):
|
|
model=changemodel(model)
|
|
if model.count("claude")>0 and task_prompt=="":
|
|
LLmjson=llmFacturas2(path=path,prompt=prompt,system=system,model=model)
|
|
else:
|
|
LLmjson=llmFacturas(path=path,task_prompt=task_prompt,system=system,content=content,max_tokens=max_tokens,model=model)
|
|
TrustedLLmjson=json.loads(TrustedLLmjson)
|
|
return {"content":LLmjson,"trusted":TrustedLLmjson}
|
|
|
|
|
|
|
|
|
|
|
|
#EvalFacturas(path="example/Factura2.jpg",task_prompt="OCR",system="",content="Analiza el siguiente texto: %s",max_tokens=200,model="claude-sonnet")
|
|
|
|
def EvalClassImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",):
|
|
endpoint="classificateimage"
|
|
jsonT={"path":path,"password":password}
|
|
response=requests.get(urlText+endpoint,json=jsonT)
|
|
print(response.content)
|
|
|
|
#To Do
|
|
def EvalGeneratedText(prompt="",model="",):
|
|
pass
|
|
|
|
def EvalGenerateVoice():
|
|
def GenerateVoice():
|
|
pass
|
|
def Voice2txt():
|
|
pass
|
|
|
|
|
|
|