import requests import evaluate import deepdiff import json from fuzzywuzzy import fuzz from deepdiff import DeepDiff from deepdiff import Delta import databases #print(evaluate.list_evaluation_modules()) urlAud="http://127.0.0.1:7870/" urlText="http://127.0.0.1:7869" password="1223Aer*" def EvalVoice2Text(endpoint,datajson,Trusted): """Evaluate Voice 2 text """ apiUrl=urlAud+endpoint response = requests.get(apiUrl, json=datajson) print(datajson) A=json.loads(response.content) print(A) time=A['time'] similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower()) similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower()) path=datajson["local"] model=datajson["model"] message=A['message'] return {"content":message, "trusted":Trusted, "model":model, "time":time, "similarity":similarity, "similaritypartial":similarityPartial, "path":path } def EvalWhisper(path,Trusted=""): endpoint="/voice2txt" datajson={"url":"","password":password ,"model":"whisper","local":path} return EvalVoice2Text(endpoint,datajson,Trusted) # EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg", # Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.", # endpoint="/voice2txt") def EvalVosk(path,Trusted=""): endpoint="/voice2txtlocal" datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path} return EvalVoice2Text(endpoint,datajson,Trusted) # EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg", # Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.", # endpoint="/voice2txtlocal") def ocrfacturas(path,task_prompt): apiUrl=urlText+'/parsedimage3' datajson={"path":path,"task_prompt":task_prompt,"password":password} response = requests.get(apiUrl, json=datajson) return response.content def llmFacturas(path,task_prompt,system,content,max_tokens,model): apiUrl=urlText+'/parsedimage4' datajson={"path":path,"task_prompt":task_prompt,"system":system,"content":content,"max_tokens":max_tokens,"model":model,"password":password} response = requests.get(apiUrl, json=datajson) return response.content def llmFacturas2(path,prompt,system,model): apiUrl=urlText+'/parsedimage2' datajson={"path":path,"prompt":prompt,"system":system,"model":model,"password":password} response = requests.get(apiUrl, json=datajson) return response.content def EvalParsedImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg"): endpoint="/parsedimage" jsonT={"path":path,"password":password} response=requests.get(urlText+endpoint,json=jsonT) return response.content def EvalParsedImage5(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",option="teserac"): endpoint="/parsedimage5" jsonT={"path":path,"password":password,"option":option} response=requests.get(urlText+endpoint,json=jsonT) return response.content def EvalFacturas(path,task_prompt,TrustedOCR,option=""): if task_prompt=="parsed": OCR=EvalParsedImage(path) if task_prompt=="More Detailed Caption" or task_prompt=='OCR': OCR=ocrfacturas(path,task_prompt) if task_prompt=="scan": OCR=EvalParsedImage5(path,option) model=json.loads(OCR)["model"] content=json.loads(OCR)["content"] time=json.loads(OCR)["time"] try: TrustedOCR=json.loads(TrustedOCR) jsonok=1 except: jsonok=0 pass similarity=fuzz.ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower()) similarityPartial=fuzz.partial_ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower()) return {"content":content, "trusted":TrustedOCR, "similarity":similarity, "similaritypartial":similarityPartial, "model":model, "time":time, "jsonok":jsonok } def changemodel(model): if model=="Claude-sonnet": model="claude-3-5-sonnet-20240620" elif model=="Claude-opus": model="claude-3-opus-20240229" elif model=="Claude-haiku": model="claude-3-haiku-20240307" return model def EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson): model=changemodel(model) if model.count("claude")>0 and task_prompt=="": LLmjson=llmFacturas2(path=path,prompt=prompt,system=system,model=model) else: LLmjson=llmFacturas(path=path,task_prompt=task_prompt,system=system,content=content,max_tokens=max_tokens,model=model) TrustedLLmjson=json.loads(TrustedLLmjson) return {"content":LLmjson,"trusted":TrustedLLmjson} #EvalFacturas(path="example/Factura2.jpg",task_prompt="OCR",system="",content="Analiza el siguiente texto: %s",max_tokens=200,model="claude-sonnet") def EvalClassImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",): endpoint="classificateimage" jsonT={"path":path,"password":password} response=requests.get(urlText+endpoint,json=jsonT) print(response.content) #To Do def EvalGeneratedText(prompt="",model="",): pass def EvalGenerateVoice(): def GenerateVoice(): pass def Voice2txt(): pass