import fastapi
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse
from pydantic import BaseModel
import time
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI, Query, File, UploadFile
#from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.cors import CORSMiddleware
import main
import os
from databases import db
import audioread
import pandas as pd
import statistics
pwd = os.getcwd()
pathAud="example/audio"
pathFact="example/factura"
app = FastAPI()
#app.mount("/statics", StaticFiles(directory="statics"), name="statics")
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class Response(BaseModel):
"""Structure of data to querry of make post from X or article blog
"""
path: str = Query("", description="Style and sentiments of text")
Trusted: str = Query("", description="Style and sentiments of text")
model : str = Query("whisper", description="Style and sentiments of text")
class Response1(BaseModel):
path: str = Query("", description="path file")
task_prompt: str = Query("", description="task of model")
model: str = Query("", description="model")
TrustedOCR: str = Query("", description="truted OCR model")
option: str = Query("", description="OCR model option")
class Response2(BaseModel):
path: str = Query("", description="path file")
task_prompt: str = Query("", description="task of model")
system: str = Query("", description="prompt system LLM model with ocr and image claude")
content: str = Query("%s", description="prompt content LLM model with ocr")
max_tokens: int = Query(1024, description="maxtoken LLM OCR model")
model: str = Query("Claude-sonnet", description="model")
prompt: str = Query("", description="prompt in claude with image")
TrustedLLmjson: str = Query("", description="truted OCR model")
class Response3(BaseModel):
"""Structure of data to querry of make post from X or article blog
"""
path: str = Query("", description="Style and sentiments of text")
Trusted: str = Query("", description="Style and sentiments of text")
mode : str = Query("whisper", description="Style and sentiments of text")
@app.get("/addTrusted")
@app.post("/addTrusted")
def addTrusted(response:Response3):
path=response.path
Trusted=response.Trusted
mode=response.mode
file_stats = os.stat(path)
size=file_stats.st_size / (1024 * 1024)
if mode=="voice":
with audioread.audio_open(path) as f:
duration = f.duration
else:
duration = 0
if db(db.trusted.path == path and db.trusted.mode == mode).count()==0:
db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration =duration )
db.commit()
return "Add %s in mode %s"%(path,mode)
else:
db(db.trusted.path == path and db.trusted.mode == mode).update(trusted=Trusted,size=size,duration =duration )
db.commit()
return "Update %s in mode %s"%(path,mode)
def list2tablehtml(listdata,model):
html="""
Table of {0}
path
time
similarity
similaritypartial
""".format(model)
for i in listdata:
html=html+"""
"""
return html
def tableVoice(model):
rows = db(db.analitic_voice.model==model).select()
rows_list = rows.as_list()
data=pd.DataFrame(rows_list)
durationL=list()
for i in rows_list:
durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
duration=statistics.mean(durationL)
time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
efectivetime=time/duration
card="""
{0}
time of process (sg)
{1}
similarity
{2}
similaritypartial
{3}
time of audio(sg)
{4}
time in process
{5}
""".format(model,time,similarity,similaritypartial,duration,efectivetime)
return {"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtml(rows_list,model)}
@app.get("/getmetricsvoice")
def getMetricsVoice():
pass
models=list()
for row in db().select(db.analitic_voice.model, distinct=True):
models.append(row.model)
cards=""
dataAll=""
for model in models:
Sal=tableVoice(model)
cards=cards+Sal["card"]
dataAll=dataAll+Sal["data"]
htmlhead="""
Evaluacion de modelos voice2txt
"""
htmlbody="""
Estadisticas modelos de voice
{0}
{1}
""".format(cards,dataAll)
html=htmlhead+htmlbody
return HTMLResponse(content=html, status_code=200)
@app.get("/EvalVoice")
@app.post("/EvalVoice")
def EvalVoice(response:Response):
path=response.path
Trusted=response.Trusted
model=response.model
if Trusted=="":
row=db(db.trusted.path == path and db.trusted.mode == "voice").select().first()
try:
Trusted=row.trusted
except:
pass
if model=="whisper":
Sal=main.EvalWhisper(path,Trusted)
else:
Sal=main.EvalVosk(path,Trusted)
if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
db.analitic_voice.insert(**Sal)
db.commit()
else:
db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"])
db.commit()
return Sal
@app.get("/EvalFact")
@app.post("/EvalFact")
def EvalFact(response:Response1):
path=response.path
task_prompt=response.task_prompt
option=response.model
TrustedOCR=response.TrustedOCR
Trusted=TrustedOCR
if task_prompt=="":
if Trusted=="":
row=db(db.trusted.path == path and db.trusted.mode == "OCR").select().first()
try:
Trusted=row.trusted
except:
pass
Sal=main.EvalFacturas(path,task_prompt,TrustedOCR,option)
Sal["path"]=path
if db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).count()==0:
db.analitic_ocr.insert(**Sal)
db.commit()
else:
db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],jsonok=Sal["jsonok"])
db.commit()
return Sal
@app.get("/EvalLLMFact")
@app.post("/EvalLLMFact")
def EvalLLMFact(response:Response2):
path=response.path
task_prompt=response.task_prompt
system=response.system
content=response.content
max_tokens=response.max_tokens
model=response.model
prompt=response.prompt
TrustedLLmjson=response.TrustedLLmjson
Sal=main.EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson)
return Sal
@app.get("/evalvoicehtml")
def EvalVoicehtml():
dir_list = os.listdir(pathAud)
Sal=""
t=1
for i in dir_list:
temp="""
"""%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""
Evaluacion de modelos voice2txt
Petición POST a API
"""%(Sal)
return HTMLResponse(content=html, status_code=200)
@app.get("/evalocrfactura")
def EvalOCRFactura():
dir_list = os.listdir(pathFact)
Sal=""
t=1
for i in dir_list:
temp="""
"""%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""
Evaluacion de modelos OCR
""".format(model,time,similarity,similaritypartial)
return {"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtmlOCR(rows_list,model)}
@app.get("/getmetricsocr")
def getMetricsOCR():
models=list()
for row in db().select(db.analitic_ocr.model, distinct=True):
models.append(row.model)
cards=""
dataAll=""
for model in models:
Sal=tableOCR(model)
cards=cards+Sal["card"]
dataAll=dataAll+Sal["data"]
htmlhead="""
Evaluacion de modelos voice2txt
"""
htmlbody="""
Estadisticas modelos de OCR
{0}
{1}
""".format(cards,dataAll)
html=htmlhead+htmlbody
return HTMLResponse(content=html, status_code=200)
@app.get("/evalllmfacturas")
def EvalllmFacturas():
dir_list = os.listdir(pathFact)
Sal=""
t=1
for i in dir_list:
temp="""
"""%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""
Evaluacion modelos LLM