EvalDataSetHugging/apis.py

757 lines
23 KiB
Python

import fastapi
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse,JSONResponse
from pydantic import BaseModel
import time
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI, Query, File, UploadFile,HTTPException
#from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.cors import CORSMiddleware
import main
import os
from databases import db
import audioread
import pandas as pd
import statistics
import hashlib
from datetime import datetime
import json
pwd = os.getcwd()
pathAud="example/audio"
pathFact="example/factura"
pathText="example/texto"
def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
configPath=os.path.join(os.getcwd(),relPath)
with open(configPath, 'r', encoding='utf-8') as file:
config = json.load(file)[nameModel]
Output= config[dataOut]
return Output
mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
app = FastAPI()
#app.mount("/statics", StaticFiles(directory="statics"), name="statics")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class Response(BaseModel):
"""Structure of data to querry of make post from X or article blog
"""
path: str = Query("", description="Style and sentiments of text")
model : str = Query("whisper", description="Style and sentiments of text")
class Response1(BaseModel):
path: str = Query("", description="path file")
task_prompt: str = Query("", description="task of model")
model: str = Query("", description="model")
TrustedOCR: str = Query("", description="truted OCR model")
option: str = Query("", description="OCR model option")
class Response2(BaseModel):
path: str = Query("", description="path file")
task_prompt: str = Query("", description="task of model")
system: str = Query("", description="prompt system LLM model with ocr and image claude")
content: str = Query("%s", description="prompt content LLM model with ocr")
max_tokens: int = Query(1024, description="maxtoken LLM OCR model")
model: str = Query("Claude-sonnet", description="model")
prompt: str = Query("", description="prompt in claude with image")
TrustedLLmjson: str = Query("", description="truted OCR model")
class Response3(BaseModel):
"""Structure of data to querry of make post from X or article blog
"""
path: str = Query("", description="Style and sentiments of text")
Trusted: str = Query("", description="Style and sentiments of text")
mode : str = Query("whisper", description="Style and sentiments of text")
#Funcionales
@app.get("/addTrusted")
@app.post("/addTrusted")
def addTrusted(response:Response3):
"""Api to add information of Trusted data
Args:
response (Response3): 3 params:
path : path of archive on system if is a file OR text if is text.
Trusted : information Trusted or better information in a process.
mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice,
Returns:
_type_: _description_
"""
path=response.path
Trusted=response.Trusted
mode=response.mode
last_modified=datetime.now()
if mode not in mode_list.keys():
return JSONResponse(
status_code=404,
content={"content": "mode no found" }
)
if mode == "llm_factura" or mode == "ocr" or mode == "voice":
if not os.path.isfile(path):
return JSONResponse(
status_code=404,
content={"content": "file no found" }
)
if mode_list[mode]=="texto":
hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt"
f = open("example/texto/"+hash1, "w")
f.write(path)
f.close()
path=pwd+"/"+pathText+hash1
length=len(Trusted)
size=0
duration=0
elif mode_list[mode]=="factura":
file_stats = os.stat(path)
size=file_stats.st_size / (1024 * 1024)
length=0
duration=0
elif mode_list[mode]=="audio":
with audioread.audio_open(path) as f:
duration = f.duration
length=0
size=0
if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0:
db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length )
db.commit()
return "Add %s in mode %s"%(path,mode)
else:
item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last()
modification_count=item.modification_count + 1
db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count)
db.commit()
return "Update %s in mode %s"%(path,mode)
@app.get("/EvalVoice")
@app.post("/EvalVoice")
def EvalVoice(response:Response):
path=response.path
model=response.model
if db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).count()==0:
return JSONResponse(
status_code=404,
content={"content": "Trusted no found" }
)
Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted
print(Trusted)
if model=="whisper":
Sal=main.EvalWhisper(path,Trusted)
else:
Sal=main.EvalVosk(path,Trusted)
Sal["last_modified"]=datetime.now()
if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
db.analitic_voice.insert(**Sal)
db.commit()
else:
db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
db.commit()
return Sal
def list2tablehtml(listdata,model):
html="""<h2>Table of {0}</h2>
<table style="width:100%">
<tr>
<th>path</th>
<th>time</th>
<th>similarity</th>
<th>similaritypartial</th>
</tr>""".format(model)
for i in listdata:
html=html+""" <tr>
<td>%s</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>
"""%(i["path"],i["time"],i["similarity"],i["similaritypartial"])
html=html+"""</table>
"""
return html
def tableVoice(model):
rows = db(db.analitic_voice.model==model).select()
rows_list = rows.as_list()
data=pd.DataFrame(rows_list)
durationL=list()
for i in rows_list:
durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
duration=statistics.mean(durationL)
time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
efectivetime=time/duration
card="""<div class="flip-card">
<div class="flip-card-inner">
<div class="flip-card-front">
<p style="width:300px;height:300px;">{0} </p>
</div>
<div class="flip-card-back">
<h1>time of process (sg)</h1>
<p>{1}</p>
<h1>similarity</h1>
<p>{2}</p>
<h1>similaritypartial</h1>
<p>{3}</p>
<h1>time of audio(sg)</h1>
<p>{4}</p>
<h1>time in process</h1>
<p>{5}</p>
</div>
</div>
</div>""".format(model,time,similarity,similaritypartial,duration,efectivetime)
return {"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtml(rows_list,model)}
@app.get("/getmetricsvoice")
def getMetricsVoice():
pass
models=list()
for row in db().select(db.analitic_voice.model, distinct=True):
models.append(row.model)
cards=""
dataAll=""
for model in models:
Sal=tableVoice(model)
cards=cards+Sal["card"]
dataAll=dataAll+Sal["data"]
htmlhead="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
. container{
display:flex;
}
/* The flip card container - set the width and height to whatever you want. We have added the border property to demonstrate that the flip itself goes out of the box on hover (remove perspective if you don't want the 3D effect */
.flip-card {
background-color: transparent;
width: 500px;
height: 500px;
border: 1px solid #f1f1f1;
perspective: 1000px; /* Remove this if you don't want the 3D effect */
}
/* This container is needed to position the front and back side */
.flip-card-inner {
position: relative;
width: 100%;
height: 100%;
text-align: center;
transition: transform 0.8s;
transform-style: preserve-3d;
}
/* Do an horizontal flip when you move the mouse over the flip box container */
.flip-card:hover .flip-card-inner {
transform: rotateY(180deg);
}
/* Position the front and back side */
.flip-card-front, .flip-card-back {
position: absolute;
width: 100%;
height: 100%;
-webkit-backface-visibility: hidden; /* Safari */
backface-visibility: hidden;
}
/* Style the front side (fallback if image is missing) */
.flip-card-front {
background-color: #bbb;
color: black;
}
/* Style the back side */
.flip-card-back {
background-color: dodgerblue;
color: white;
transform: rotateY(180deg);
}
</style>
</head>"""
htmlbody="""<body>
<h1>Estadisticas modelos de voice</h1>
<div class=”container”>
{0}
</div>
{1}
</body>
</html>
""".format(cards,dataAll)
html=htmlhead+htmlbody
return HTMLResponse(content=html, status_code=200)
@app.get("/EvalFact")
@app.post("/EvalFact")
def EvalFact(response:Response1):
path=response.path
task_prompt=response.task_prompt
option=response.model
TrustedOCR=response.TrustedOCR
Trusted=TrustedOCR
if task_prompt=="":
if Trusted=="":
row=db(db.trusted.path == path and db.trusted.mode == "OCR").select().first()
try:
Trusted=row.trusted
except:
pass
Sal=main.EvalFacturas(path,task_prompt,TrustedOCR,option)
Sal["path"]=path
if db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).count()==0:
db.analitic_ocr.insert(**Sal)
db.commit()
else:
db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],jsonok=Sal["jsonok"])
db.commit()
return Sal
@app.get("/EvalLLMFact")
@app.post("/EvalLLMFact")
def EvalLLMFact(response:Response2):
path=response.path
task_prompt=response.task_prompt
system=response.system
content=response.content
max_tokens=response.max_tokens
model=response.model
prompt=response.prompt
TrustedLLmjson=response.TrustedLLmjson
Sal=main.EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson)
return Sal
@app.get("/evalvoicehtml")
def EvalVoicehtml():
dir_list = os.listdir(pathAud)
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición POST a API</h1>
<select id="texto1">
%s
</select>
<br>
<select id="texto2">
<option value="whisper">whisper</option>
<option value="vosk">vosk</option>
</select>
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const datos = {
path: texto1,
model: texto2
};
fetch('/EvalVoice', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal)
return HTMLResponse(content=html, status_code=200)
@app.get("/evalocrfactura")
def EvalOCRFactura():
dir_list = os.listdir(pathFact)
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos OCR</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición POST a API</h1>
<select id="texto1">
%s
</select>
<br>
<select id="texto2">
<option value="More Detailed Caption">More Detailed Caption</option>
<option value="OCR">OCR</option>
<option value="parsed">parsed</option>
<option value="scan">scan</option>
</select>
<br>
<input type="text" id="texto3" placeholder="TrustedOCR">
<br>
<input type="text" id="texto4" placeholder="option">
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const texto3 = document.getElementById('texto3').value;
const texto4 = document.getElementById('texto4').value;
const datos = {
path: texto1,
task_prompt: texto2,
TrustedOCR: texto3,
option: texto4
};
fetch('/EvalFact', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal)
return HTMLResponse(content=html, status_code=200)
def list2tablehtmlOCR(listdata,model):
html="""<h2>Table of {0}</h2>
<table style="width:100%">
<tr>
<th>path</th>
<th>time</th>
<th>similarity</th>
<th>similaritypartial</th>
</tr>""".format(model)
for i in listdata:
html=html+""" <tr>
<td>%s</td>
<td>%s</td>
<td>%s</td>
<td>%s</td>
</tr>
"""%(i["path"],i["time"],i["similarity"],i["similaritypartial"])
html=html+"""</table>
"""
return html
def tableOCR(model):
rows = db(db.analitic_ocr.model==model).select()
rows_list = rows.as_list()
data=pd.DataFrame(rows_list)
time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
card="""<div class="flip-card">
<div class="flip-card-inner">
<div class="flip-card-front">
<p style="width:300px;height:300px;">{0} </p>
</div>
<div class="flip-card-back">
<h1>time of process (sg)</h1>
<p>{1}</p>
<h1>similarity</h1>
<p>{2}</p>
<h1>similaritypartial</h1>
<p>{3}</p>
</div>
</div>
</div>""".format(model,time,similarity,similaritypartial)
return {"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtmlOCR(rows_list,model)}
@app.get("/getmetricsocr")
def getMetricsOCR():
models=list()
for row in db().select(db.analitic_ocr.model, distinct=True):
models.append(row.model)
cards=""
dataAll=""
for model in models:
Sal=tableOCR(model)
cards=cards+Sal["card"]
dataAll=dataAll+Sal["data"]
htmlhead="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
. container{
display:flex;
}
/* The flip card container - set the width and height to whatever you want. We have added the border property to demonstrate that the flip itself goes out of the box on hover (remove perspective if you don't want the 3D effect */
.flip-card {
background-color: transparent;
width: 500px;
height: 500px;
border: 1px solid #f1f1f1;
perspective: 1000px; /* Remove this if you don't want the 3D effect */
}
/* This container is needed to position the front and back side */
.flip-card-inner {
position: relative;
width: 100%;
height: 100%;
text-align: center;
transition: transform 0.8s;
transform-style: preserve-3d;
}
/* Do an horizontal flip when you move the mouse over the flip box container */
.flip-card:hover .flip-card-inner {
transform: rotateY(180deg);
}
/* Position the front and back side */
.flip-card-front, .flip-card-back {
position: absolute;
width: 100%;
height: 100%;
-webkit-backface-visibility: hidden; /* Safari */
backface-visibility: hidden;
}
/* Style the front side (fallback if image is missing) */
.flip-card-front {
background-color: #bbb;
color: black;
}
/* Style the back side */
.flip-card-back {
background-color: dodgerblue;
color: white;
transform: rotateY(180deg);
}
</style>
</head>"""
htmlbody="""<body>
<h1>Estadisticas modelos de OCR</h1>
<div class=”container”>
{0}
</div>
{1}
</body>
</html>
""".format(cards,dataAll)
html=htmlhead+htmlbody
return HTMLResponse(content=html, status_code=200)
@app.get("/evalllmfacturas")
def EvalllmFacturas():
dir_list = os.listdir(pathFact)
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion modelos LLM</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición POST a API</h1>
<select id="texto1">
%s
</select>
<br>
<select id="texto2">
<option value="">N.A.</option>
<option value="More Detailed Caption">More Detailed Caption</option>
<option value="OCR">OCR</option>
<option value="parsed">parsed</option>
<option value="scan">scan</option>
</select>
<br>
<input type="text" id="texto3" placeholder="system" value="Eres un chatbot amable">
<br>
<input type="text" id="texto4" placeholder="content" value="%s">
<br>
<input type="number" id="texto5" placeholder="max_tokens" value=1024>
<br>
<input type="text" id="texto6" placeholder="model" value="Claude-sonnet">
<br>
<input type="text" id="texto7" placeholder="prompt" value="Analiza la factura">
<br>
<input type="text" id="texto8" placeholder="TrustedLLmjson" value="{'A':''}">
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const texto3 = document.getElementById('texto3').value;
const texto4 = document.getElementById('texto4').value;
const texto5 = document.getElementById('texto5').value;
const texto6 = document.getElementById('texto6').value;
const texto7 = document.getElementById('texto7').value;
const texto8 = document.getElementById('texto8').value;
const datos = {
path: texto1,
task_prompt: texto2,
system: texto3,
content:texto4,
max_tokens:texto5,
model:texto6,
prompt:texto7,
TrustedLLmjson:texto8,
};
fetch('/EvalLLMFact', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal,"%s")
return HTMLResponse(content=html, status_code=200)