EvalDataSetHugging/main.py

import requests
import evaluate
import deepdiff
import json
from fuzzywuzzy import fuzz
from deepdiff import DeepDiff
from deepdiff import Delta
import databases
#print(evaluate.list_evaluation_modules())
urlAud="http://127.0.0.1:7870/"
urlText="http://127.0.0.1:7869"
password="1223Aer*"
def EvalVoice2Text(endpoint,datajson,Trusted):
    """Evaluate Voice 2 text
    """
    apiUrl=urlAud+endpoint
    response = requests.get(apiUrl,  json=datajson)
    A=json.loads(response.content)
    time=A['time']
    similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
    similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
    path=datajson["local"]
    model=datajson["model"]
    message=A['message']
    return {"content":message,
            "trusted":Trusted,
            "model":model,
            "time":time,
            "similarity":similarity,
            "similaritypartial":similarityPartial,
            "path":path
            }

def EvalWhisper(path,Trusted=""):
    endpoint="/voice2txt"
    datajson={"url":"","password":password ,"model":"whisper","local":path}
    return EvalVoice2Text(endpoint,datajson,Trusted)

def EvalVosk(path,Trusted=""):
    endpoint="/voice2txtlocal"
    datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
    return EvalVoice2Text(endpoint,datajson,Trusted)


def EvalLLMCompra(endpoint,datajson,Trusted):
    """Evaluate Voice 2 text
    """
    apiUrl=urlText+endpoint
    response = requests.get(apiUrl,  json=datajson)
    A=json.loads(response.content)
    time=A['time']
    print(A)
    similarity=fuzz.ratio( Trusted.strip().lower(),A['content'].strip().lower())
    similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['content'].strip().lower())
    #path=datajson["local"]
    model=datajson["model"]

    message=A['content']
    return {"content":message,
            "trusted":Trusted,
            "model":model,
            "time":time,
            "similarity":similarity,
            "similaritypartial":similarityPartial,
            "path":message
            }

def EvalModelLLMCompra(system,content,model,max_new_tokens,Trusted):
    endpoint="/genTextCustom"
    datajson={"system":system,"content":content,"password":password ,"model":model,"max_new_token":max_new_tokens}
    return EvalLLMCompra(endpoint,datajson,Trusted)


# EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
#             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
#             endpoint="/voice2txtlocal")
# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
#             Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
#             endpoint="/voice2txt")


def ocrfacturas(path,task_prompt):
    apiUrl=urlText+'/parsedimage3'
    datajson={"path":path,"task_prompt":task_prompt,"password":password}
    response = requests.get(apiUrl,  json=datajson)
    return response.content

def llmFacturas(path,task_prompt,system,content,max_tokens,model):
    apiUrl=urlText+'/parsedimage4'
    datajson={"path":path,"task_prompt":task_prompt,"system":system,"content":content,"max_tokens":max_tokens,"model":model,"password":password}
    response = requests.get(apiUrl,  json=datajson)
    return response.content

def llmFacturas2(path,prompt,system,model):
    apiUrl=urlText+'/parsedimage2'
    datajson={"path":path,"prompt":prompt,"system":system,"model":model,"password":password}
    response = requests.get(apiUrl,  json=datajson)
    return response.content

def EvalParsedImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg"):
    endpoint="/parsedimage"
    jsonT={"path":path,"password":password}
    response=requests.get(urlText+endpoint,json=jsonT)
    return response.content

def EvalParsedImage5(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",option="teserac"):
    endpoint="/parsedimage5"
    jsonT={"path":path,"password":password,"option":option}
    response=requests.get(urlText+endpoint,json=jsonT)
    return response.content

def EvalFacturas(path,task_prompt,TrustedOCR,option=""):
    if task_prompt=="parsed":
        OCR=EvalParsedImage(path)
    if task_prompt=="More Detailed Caption" or task_prompt=='OCR':
        OCR=ocrfacturas(path,task_prompt)
    if task_prompt=="scan":
        OCR=EvalParsedImage5(path,option)
    model=json.loads(OCR)["model"]
    content=json.loads(OCR)["content"]
    time=json.loads(OCR)["time"]
    try:
        TrustedOCR=json.loads(TrustedOCR)
        jsonok=1
    except:
        jsonok=0
        pass
    similarity=fuzz.ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
    similarityPartial=fuzz.partial_ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
    return {"content":content,
            "trusted":TrustedOCR,
            "similarity":similarity,
            "similaritypartial":similarityPartial,
            "model":model,
            "time":time,
            "jsonok":jsonok
            }
def changemodel(model):
    if model=="Claude-sonnet":
        model="claude-3-5-sonnet-20240620"
    elif model=="Claude-opus":
        model="claude-3-opus-20240229"
    elif model=="Claude-haiku":
        model="claude-3-haiku-20240307"
    return model

def EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson):
    model=changemodel(model)
    if model.count("claude")>0 and task_prompt=="":
        LLmjson=llmFacturas2(path=path,prompt=prompt,system=system,model=model)
    else:
        LLmjson=llmFacturas(path=path,task_prompt=task_prompt,system=system,content=content,max_tokens=max_tokens,model=model)
    TrustedLLmjson=json.loads(TrustedLLmjson)
    return {"content":LLmjson,"trusted":TrustedLLmjson}


#EvalFacturas(path="example/Factura2.jpg",task_prompt="OCR",system="",content="Analiza el siguiente texto: %s",max_tokens=200,model="claude-sonnet")

def EvalClassImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",):
    endpoint="classificateimage"
    jsonT={"path":path,"password":password}
    response=requests.get(urlText+endpoint,json=jsonT)
    print(response.content)

#To Do
def EvalGeneratedText(prompt="",model="",):
    pass

def EvalGenerateVoice():
    def GenerateVoice():
        pass
    def Voice2txt():
        pass