Init repo

This commit is contained in:
Mario Gil 2024-07-26 08:08:02 -05:00
parent d39268cf49
commit 4b52d4aa91
4 changed files with 916 additions and 0 deletions

10
.gitignore vendored Normal file
View File

@ -0,0 +1,10 @@
env/*
databases/storage.db
4b751a4425c2884286a92fde2de6427f_trusted.table
4b751a4425c2884286a92fde2de6427f_analitic.table
4b751a4425c2884286a92fde2de6427f_analitic_voice.table
4b751a4425c2884286a92fde2de6427f_analitic_llm.table
4b751a4425c2884286a92fde2de6427f_analitic_ocr.table
.vscode/*
__pycache__/*

706
apis.py Normal file
View File

@ -0,0 +1,706 @@
import fastapi
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from pydantic import BaseModel
import time
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI, Query, File, UploadFile
#from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.cors import CORSMiddleware
import main
import os
from databases import db
import audioread
import pandas as pd
import statistics
pwd = os.getcwd()
pathAud="example/audio"
pathFact="example/factura"
app = FastAPI()
#app.mount("/statics", StaticFiles(directory="statics"), name="statics")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class Response(BaseModel):
"""Structure of data to querry of make post from X or article blog
"""
path: str = Query("", description="Style and sentiments of text")
Trusted: str = Query("", description="Style and sentiments of text")
model : str = Query("whisper", description="Style and sentiments of text")
class Response1(BaseModel):
path: str = Query("", description="path file")
task_prompt: str = Query("", description="task of model")
model: str = Query("", description="model")
TrustedOCR: str = Query("", description="truted OCR model")
option: str = Query("", description="OCR model option")
class Response2(BaseModel):
path: str = Query("", description="path file")
task_prompt: str = Query("", description="task of model")
system: str = Query("", description="prompt system LLM model with ocr and image claude")
content: str = Query("%s", description="prompt content LLM model with ocr")
max_tokens: int = Query(1024, description="maxtoken LLM OCR model")
model: str = Query("Claude-sonnet", description="model")
prompt: str = Query("", description="prompt in claude with image")
TrustedLLmjson: str = Query("", description="truted OCR model")
class Response3(BaseModel):
"""Structure of data to querry of make post from X or article blog
"""
path: str = Query("", description="Style and sentiments of text")
Trusted: str = Query("", description="Style and sentiments of text")
mode : str = Query("whisper", description="Style and sentiments of text")
@app.get("/addTrusted")
@app.post("/addTrusted")
def addTrusted(response:Response3):
path=response.path
Trusted=response.Trusted
mode=response.mode
file_stats = os.stat(path)
size=file_stats.st_size / (1024 * 1024)
if mode=="voice":
with audioread.audio_open(path) as f:
duration = f.duration
else:
duration = 0
if db(db.trusted.path == path and db.trusted.mode == mode).count()==0:
db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration =duration )
db.commit()
return "Add %s in mode %s"%(path,mode)
else:
db(db.trusted.path == path and db.trusted.mode == mode).update(trusted=Trusted,size=size,duration =duration )
db.commit()
return "Update %s in mode %s"%(path,mode)
def list2tablehtml(listdata,model):
html="""<h2>Table of {0}</h2>
<table style="width:100%">
<tr>
<th>path</th>
<th>time</th>
<th>similarity</th>
<th>similaritypartial</th>
</tr>""".format(model)
for i in listdata:
html=html+""" <tr>
<td>%s</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>
"""%(i["path"],i["time"],i["similarity"],i["similaritypartial"])
html=html+"""</table>
"""
return html
def tableVoice(model):
rows = db(db.analitic_voice.model==model).select()
rows_list = rows.as_list()
data=pd.DataFrame(rows_list)
durationL=list()
for i in rows_list:
durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
duration=statistics.mean(durationL)
time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
efectivetime=time/duration
card="""<div class="flip-card">
<div class="flip-card-inner">
<div class="flip-card-front">
<p style="width:300px;height:300px;">{0} </p>
</div>
<div class="flip-card-back">
<h1>time of process (sg)</h1>
<p>{1}</p>
<h1>similarity</h1>
<p>{2}</p>
<h1>similaritypartial</h1>
<p>{3}</p>
<h1>time of audio(sg)</h1>
<p>{4}</p>
<h1>time in process</h1>
<p>{5}</p>
</div>
</div>
</div>""".format(model,time,similarity,similaritypartial,duration,efectivetime)
return {"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtml(rows_list,model)}
@app.get("/getmetricsvoice")
def getMetricsVoice():
pass
models=list()
for row in db().select(db.analitic_voice.model, distinct=True):
models.append(row.model)
cards=""
dataAll=""
for model in models:
Sal=tableVoice(model)
cards=cards+Sal["card"]
dataAll=dataAll+Sal["data"]
htmlhead="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
. container{
display:flex;
}
/* The flip card container - set the width and height to whatever you want. We have added the border property to demonstrate that the flip itself goes out of the box on hover (remove perspective if you don't want the 3D effect */
.flip-card {
background-color: transparent;
width: 500px;
height: 500px;
border: 1px solid #f1f1f1;
perspective: 1000px; /* Remove this if you don't want the 3D effect */
}
/* This container is needed to position the front and back side */
.flip-card-inner {
position: relative;
width: 100%;
height: 100%;
text-align: center;
transition: transform 0.8s;
transform-style: preserve-3d;
}
/* Do an horizontal flip when you move the mouse over the flip box container */
.flip-card:hover .flip-card-inner {
transform: rotateY(180deg);
}
/* Position the front and back side */
.flip-card-front, .flip-card-back {
position: absolute;
width: 100%;
height: 100%;
-webkit-backface-visibility: hidden; /* Safari */
backface-visibility: hidden;
}
/* Style the front side (fallback if image is missing) */
.flip-card-front {
background-color: #bbb;
color: black;
}
/* Style the back side */
.flip-card-back {
background-color: dodgerblue;
color: white;
transform: rotateY(180deg);
}
</style>
</head>"""
htmlbody="""<body>
<h1>Estadisticas modelos de voice</h1>
<div class=container>
{0}
</div>
{1}
</body>
</html>
""".format(cards,dataAll)
html=htmlhead+htmlbody
return HTMLResponse(content=html, status_code=200)
@app.get("/EvalVoice")
@app.post("/EvalVoice")
def EvalVoice(response:Response):
path=response.path
Trusted=response.Trusted
model=response.model
if Trusted=="":
row=db(db.trusted.path == path and db.trusted.mode == "voice").select().first()
try:
Trusted=row.trusted
except:
pass
if model=="whisper":
Sal=main.EvalWhisper(path,Trusted)
else:
Sal=main.EvalVosk(path,Trusted)
if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
db.analitic_voice.insert(**Sal)
db.commit()
else:
db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"])
db.commit()
return Sal
@app.get("/EvalFact")
@app.post("/EvalFact")
def EvalFact(response:Response1):
path=response.path
task_prompt=response.task_prompt
option=response.model
TrustedOCR=response.TrustedOCR
Trusted=TrustedOCR
if task_prompt=="":
if Trusted=="":
row=db(db.trusted.path == path and db.trusted.mode == "OCR").select().first()
try:
Trusted=row.trusted
except:
pass
Sal=main.EvalFacturas(path,task_prompt,TrustedOCR,option)
Sal["path"]=path
if db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).count()==0:
db.analitic_ocr.insert(**Sal)
db.commit()
else:
db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],jsonok=Sal["jsonok"])
db.commit()
return Sal
@app.get("/EvalLLMFact")
@app.post("/EvalLLMFact")
def EvalLLMFact(response:Response2):
path=response.path
task_prompt=response.task_prompt
system=response.system
content=response.content
max_tokens=response.max_tokens
model=response.model
prompt=response.prompt
TrustedLLmjson=response.TrustedLLmjson
Sal=main.EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson)
return Sal
@app.get("/evalvoicehtml")
def EvalVoicehtml():
dir_list = os.listdir(pathAud)
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición POST a API</h1>
<select id="texto1">
%s
</select>
<br>
<input type="text" id="texto2" placeholder="Trusted">
<br>
<select id="texto3">
<option value="whisper">whisper</option>
<option value="vosk">vosk</option>
</select>
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const texto3 = document.getElementById('texto3').value;
const datos = {
path: texto1,
Trusted: texto2,
model: texto3
};
fetch('/EvalVoice', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal)
return HTMLResponse(content=html, status_code=200)
@app.get("/evalocrfactura")
def EvalOCRFactura():
dir_list = os.listdir(pathFact)
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos OCR</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición POST a API</h1>
<select id="texto1">
%s
</select>
<br>
<select id="texto2">
<option value="More Detailed Caption">More Detailed Caption</option>
<option value="OCR">OCR</option>
<option value="parsed">parsed</option>
<option value="scan">scan</option>
</select>
<br>
<input type="text" id="texto3" placeholder="TrustedOCR">
<br>
<input type="text" id="texto4" placeholder="option">
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const texto3 = document.getElementById('texto3').value;
const texto4 = document.getElementById('texto4').value;
const datos = {
path: texto1,
task_prompt: texto2,
TrustedOCR: texto3,
option: texto4
};
fetch('/EvalFact', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal)
return HTMLResponse(content=html, status_code=200)
def list2tablehtmlOCR(listdata,model):
html="""<h2>Table of {0}</h2>
<table style="width:100%">
<tr>
<th>path</th>
<th>time</th>
<th>similarity</th>
<th>similaritypartial</th>
</tr>""".format(model)
for i in listdata:
html=html+""" <tr>
<td>%s</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>
"""%(i["path"],i["time"],i["similarity"],i["similaritypartial"])
html=html+"""</table>
"""
return html
def tableOCR(model):
rows = db(db.analitic_ocr.model==model).select()
rows_list = rows.as_list()
data=pd.DataFrame(rows_list)
time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
card="""<div class="flip-card">
<div class="flip-card-inner">
<div class="flip-card-front">
<p style="width:300px;height:300px;">{0} </p>
</div>
<div class="flip-card-back">
<h1>time of process (sg)</h1>
<p>{1}</p>
<h1>similarity</h1>
<p>{2}</p>
<h1>similaritypartial</h1>
<p>{3}</p>
</div>
</div>
</div>""".format(model,time,similarity,similaritypartial)
return {"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtmlOCR(rows_list,model)}
@app.get("/getmetricsocr")
def getMetricsOCR():
models=list()
for row in db().select(db.analitic_ocr.model, distinct=True):
models.append(row.model)
cards=""
dataAll=""
for model in models:
Sal=tableOCR(model)
cards=cards+Sal["card"]
dataAll=dataAll+Sal["data"]
htmlhead="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
. container{
display:flex;
}
/* The flip card container - set the width and height to whatever you want. We have added the border property to demonstrate that the flip itself goes out of the box on hover (remove perspective if you don't want the 3D effect */
.flip-card {
background-color: transparent;
width: 500px;
height: 500px;
border: 1px solid #f1f1f1;
perspective: 1000px; /* Remove this if you don't want the 3D effect */
}
/* This container is needed to position the front and back side */
.flip-card-inner {
position: relative;
width: 100%;
height: 100%;
text-align: center;
transition: transform 0.8s;
transform-style: preserve-3d;
}
/* Do an horizontal flip when you move the mouse over the flip box container */
.flip-card:hover .flip-card-inner {
transform: rotateY(180deg);
}
/* Position the front and back side */
.flip-card-front, .flip-card-back {
position: absolute;
width: 100%;
height: 100%;
-webkit-backface-visibility: hidden; /* Safari */
backface-visibility: hidden;
}
/* Style the front side (fallback if image is missing) */
.flip-card-front {
background-color: #bbb;
color: black;
}
/* Style the back side */
.flip-card-back {
background-color: dodgerblue;
color: white;
transform: rotateY(180deg);
}
</style>
</head>"""
htmlbody="""<body>
<h1>Estadisticas modelos de OCR</h1>
<div class=container>
{0}
</div>
{1}
</body>
</html>
""".format(cards,dataAll)
html=htmlhead+htmlbody
return HTMLResponse(content=html, status_code=200)
@app.get("/evalllmfacturas")
def EvalllmFacturas():
dir_list = os.listdir(pathFact)
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion modelos LLM</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición POST a API</h1>
<select id="texto1">
%s
</select>
<br>
<select id="texto2">
<option value="">N.A.</option>
<option value="More Detailed Caption">More Detailed Caption</option>
<option value="OCR">OCR</option>
<option value="parsed">parsed</option>
<option value="scan">scan</option>
</select>
<br>
<input type="text" id="texto3" placeholder="system" value="Eres un chatbot amable">
<br>
<input type="text" id="texto4" placeholder="content" value="%s">
<br>
<input type="number" id="texto5" placeholder="max_tokens" value=1024>
<br>
<input type="text" id="texto6" placeholder="model" value="Claude-sonnet">
<br>
<input type="text" id="texto7" placeholder="prompt" value="Analiza la factura">
<br>
<input type="text" id="texto8" placeholder="TrustedLLmjson" value="{'A':''}">
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const texto3 = document.getElementById('texto3').value;
const texto4 = document.getElementById('texto4').value;
const texto5 = document.getElementById('texto5').value;
const texto6 = document.getElementById('texto6').value;
const texto7 = document.getElementById('texto7').value;
const texto8 = document.getElementById('texto8').value;
const datos = {
path: texto1,
task_prompt: texto2,
system: texto3,
content:texto4,
max_tokens:texto5,
model:texto6,
prompt:texto7,
TrustedLLmjson:texto8,
};
fetch('/EvalLLMFact', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal,"%s")
return HTMLResponse(content=html, status_code=200)

43
databases.py Normal file
View File

@ -0,0 +1,43 @@
from pydal import DAL, Field
db = DAL("sqlite://databases/storage.db")
db.define_table(
"trusted",
Field("path"),
Field("mode"),
Field("trusted"),
Field("duration",type="double"),
Field("size",type="double")
)
db.define_table(
"analitic_voice",
Field("content"),
Field("trusted"),
Field("model"),
Field("time", type="double"),
Field("path"),
Field("similarity", type="double"),
Field("similaritypartial", type="double")
)
db.define_table(
"analitic_ocr",
Field("content"),
Field("trusted"),
Field("model"),
Field("time", type="double"),
Field("path"),
Field("similarity", type="double"),
Field("similaritypartial", type="double"),
Field("jsonok" ,type="integer")
)
db.define_table(
"analitic_llm",
Field("content"),
Field("trusted"),
Field("model"),
Field("time", type="double"),
Field("path"),
Field("similarity", type="double"),
Field("similaritypartial", type="double")
)

157
main.py Normal file
View File

@ -0,0 +1,157 @@
import requests
import evaluate
import deepdiff
import json
from fuzzywuzzy import fuzz
from deepdiff import DeepDiff
from deepdiff import Delta
import databases
#print(evaluate.list_evaluation_modules())
urlAud="http://127.0.0.1:7870/"
urlText="http://127.0.0.1:7869"
password="1223Aer*"
def EvalVoice2Text(endpoint,datajson,Trusted):
"""Evaluate Voice 2 text
"""
apiUrl=urlAud+endpoint
response = requests.get(apiUrl, json=datajson)
print(datajson)
A=json.loads(response.content)
print(A)
time=A['time']
similarity=fuzz.ratio( Trusted.strip().lower(),A['message'].strip().lower())
similarityPartial=fuzz.partial_ratio( Trusted.strip().lower(),A['message'].strip().lower())
path=datajson["local"]
model=datajson["model"]
message=A['message']
return {"content":message,
"trusted":Trusted,
"model":model,
"time":time,
"similarity":similarity,
"similaritypartial":similarityPartial,
"path":path
}
def EvalWhisper(path,Trusted=""):
endpoint="/voice2txt"
datajson={"url":"","password":password ,"model":"whisper","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted)
# EvalWhisper(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txt")
def EvalVosk(path,Trusted=""):
endpoint="/voice2txtlocal"
datajson={"url":"","password":password ,"model":"models/vosk-model-small-es-0.42","local":path}
return EvalVoice2Text(endpoint,datajson,Trusted)
# EvalVosk(path="example/AwACAgEAAxkBAAIBw2YX8o2vGGCNtZCXk7mY1Bm5w__lAAJmBAACxe7ARI1fUWAGcz_RNAQ.ogg",
# Trusted="Hoy compre dos medicinas Tereleji en Cruz Verde por un monto de 494 mil 400 pesos colombianos.",
# endpoint="/voice2txtlocal")
def ocrfacturas(path,task_prompt):
apiUrl=urlText+'/parsedimage3'
datajson={"path":path,"task_prompt":task_prompt,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def llmFacturas(path,task_prompt,system,content,max_tokens,model):
apiUrl=urlText+'/parsedimage4'
datajson={"path":path,"task_prompt":task_prompt,"system":system,"content":content,"max_tokens":max_tokens,"model":model,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def llmFacturas2(path,prompt,system,model):
apiUrl=urlText+'/parsedimage2'
datajson={"path":path,"prompt":prompt,"system":system,"model":model,"password":password}
response = requests.get(apiUrl, json=datajson)
return response.content
def EvalParsedImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg"):
endpoint="/parsedimage"
jsonT={"path":path,"password":password}
response=requests.get(urlText+endpoint,json=jsonT)
return response.content
def EvalParsedImage5(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",option="teserac"):
endpoint="/parsedimage5"
jsonT={"path":path,"password":password,"option":option}
response=requests.get(urlText+endpoint,json=jsonT)
return response.content
def EvalFacturas(path,task_prompt,TrustedOCR,option=""):
if task_prompt=="parsed":
OCR=EvalParsedImage(path)
if task_prompt=="More Detailed Caption" or task_prompt=='OCR':
OCR=ocrfacturas(path,task_prompt)
if task_prompt=="scan":
OCR=EvalParsedImage5(path,option)
model=json.loads(OCR)["model"]
content=json.loads(OCR)["content"]
time=json.loads(OCR)["time"]
try:
TrustedOCR=json.loads(TrustedOCR)
jsonok=1
except:
jsonok=0
pass
similarity=fuzz.ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
similarityPartial=fuzz.partial_ratio( str(TrustedOCR).strip().lower(),str(content).strip().lower())
return {"content":content,
"trusted":TrustedOCR,
"similarity":similarity,
"similaritypartial":similarityPartial,
"model":model,
"time":time,
"jsonok":jsonok
}
def changemodel(model):
if model=="Claude-sonnet":
model="claude-3-5-sonnet-20240620"
elif model=="Claude-opus":
model="claude-3-opus-20240229"
elif model=="Claude-haiku":
model="claude-3-haiku-20240307"
return model
def EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson):
model=changemodel(model)
if model.count("claude")>0 and task_prompt=="":
LLmjson=llmFacturas2(path=path,prompt=prompt,system=system,model=model)
else:
LLmjson=llmFacturas(path=path,task_prompt=task_prompt,system=system,content=content,max_tokens=max_tokens,model=model)
TrustedLLmjson=json.loads(TrustedLLmjson)
return {"content":LLmjson,"trusted":TrustedLLmjson}
#EvalFacturas(path="example/Factura2.jpg",task_prompt="OCR",system="",content="Analiza el siguiente texto: %s",max_tokens=200,model="claude-sonnet")
def EvalClassImage(path="/home/mario/Repositorios/EvalDataSetHugging/example/Gmail/20240530_112812.jpg",):
endpoint="classificateimage"
jsonT={"path":path,"password":password}
response=requests.get(urlText+endpoint,json=jsonT)
print(response.content)
#To Do
def EvalGeneratedText(prompt="",model="",):
pass
def EvalGenerateVoice():
def GenerateVoice():
pass
def Voice2txt():
pass