import fastapi from fastapi import FastAPI, Request from fastapi.responses import HTMLResponse,JSONResponse from pydantic import BaseModel import time from fastapi.staticfiles import StaticFiles from fastapi import FastAPI, Query, File, UploadFile,HTTPException #from fastapi.middleware.cors import CORSMiddleware from starlette.middleware.cors import CORSMiddleware import main import os from databases import db import audioread import pandas as pd import statistics import hashlib from datetime import datetime import json pwd = os.getcwd() pathAud="example/audio" pathFact="example/factura" pathText="example/texto" def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"): configPath=os.path.join(os.getcwd(),relPath) with open(configPath, 'r', encoding='utf-8') as file: config = json.load(file)[nameModel] Output= config[dataOut] return Output mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list") app = FastAPI() #app.mount("/statics", StaticFiles(directory="statics"), name="statics") app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) class Response(BaseModel): """Structure of data to querry of make post from X or article blog """ path: str = Query("", description="Style and sentiments of text") model : str = Query("whisper", description="Style and sentiments of text") class Response1(BaseModel): path: str = Query("", description="path file") task_prompt: str = Query("", description="task of model") model: str = Query("", description="model") TrustedOCR: str = Query("", description="truted OCR model") option: str = Query("", description="OCR model option") class Response2(BaseModel): path: str = Query("", description="path file") task_prompt: str = Query("", description="task of model") system: str = Query("", description="prompt system LLM model with ocr and image claude") content: str = Query("%s", description="prompt content LLM model with ocr") max_tokens: int = Query(1024, description="maxtoken LLM OCR model") model: str = Query("Claude-sonnet", description="model") prompt: str = Query("", description="prompt in claude with image") TrustedLLmjson: str = Query("", description="truted OCR model") class Response3(BaseModel): """Structure of data to querry of make post from X or article blog """ path: str = Query("", description="Style and sentiments of text") Trusted: str = Query("", description="Style and sentiments of text") mode : str = Query("whisper", description="Style and sentiments of text") #Funcionales @app.get("/addTrusted") @app.post("/addTrusted") def addTrusted(response:Response3): """Api to add information of Trusted data Args: response (Response3): 3 params: path : path of archive on system if is a file OR text if is text. Trusted : information Trusted or better information in a process. mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice, Returns: _type_: _description_ """ path=response.path Trusted=response.Trusted mode=response.mode last_modified=datetime.now() if mode not in mode_list.keys(): return JSONResponse( status_code=404, content={"content": "mode no found" } ) if mode == "llm_factura" or mode == "ocr" or mode == "voice": if not os.path.isfile(path): return JSONResponse( status_code=404, content={"content": "file no found" } ) if mode_list[mode]=="texto": hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt" f = open("example/texto/"+hash1, "w") f.write(path) f.close() path=pwd+"/"+pathText+hash1 length=len(Trusted) size=0 duration=0 elif mode_list[mode]=="factura": file_stats = os.stat(path) size=file_stats.st_size / (1024 * 1024) length=0 duration=0 elif mode_list[mode]=="audio": with audioread.audio_open(path) as f: duration = f.duration length=0 size=0 if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0: db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length ) db.commit() return "Add %s in mode %s"%(path,mode) else: item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last() modification_count=item.modification_count + 1 db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count) db.commit() return "Update %s in mode %s"%(path,mode) @app.get("/EvalVoice") @app.post("/EvalVoice") def EvalVoice(response:Response): path=response.path model=response.model if db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).count()==0: return JSONResponse( status_code=404, content={"content": "Trusted no found" } ) Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted print(Trusted) if model=="whisper": Sal=main.EvalWhisper(path,Trusted) else: Sal=main.EvalVosk(path,Trusted) Sal["last_modified"]=datetime.now() if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0: db.analitic_voice.insert(**Sal) db.commit() else: db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"]) db.commit() return Sal def list2tablehtml(listdata,model): html="""

Table of {0}

""".format(model) for i in listdata: html=html+""" """%(i["path"],i["time"],i["similarity"],i["similaritypartial"]) html=html+"""
path time similarity similaritypartial
%s %s %s %s
""" return html def tableVoice(model): rows = db(db.analitic_voice.model==model).select() rows_list = rows.as_list() data=pd.DataFrame(rows_list) durationL=list() for i in rows_list: durationL.append(db(db.trusted.path == i["path"] ).select().last().duration) duration=statistics.mean(durationL) time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0] similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0] similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0] efectivetime=time/duration card="""

{0}

time of process (sg)

{1}

similarity

{2}

similaritypartial

{3}

time of audio(sg)

{4}

time in process

{5}

""".format(model,time,similarity,similaritypartial,duration,efectivetime) return {"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtml(rows_list,model)} @app.get("/getmetricsvoice") def getMetricsVoice(): pass models=list() for row in db().select(db.analitic_voice.model, distinct=True): models.append(row.model) cards="" dataAll="" for model in models: Sal=tableVoice(model) cards=cards+Sal["card"] dataAll=dataAll+Sal["data"] htmlhead=""" Evaluacion de modelos voice2txt """ htmlbody="""

Estadisticas modelos de voice

{0}
{1} """.format(cards,dataAll) html=htmlhead+htmlbody return HTMLResponse(content=html, status_code=200) @app.get("/EvalFact") @app.post("/EvalFact") def EvalFact(response:Response1): path=response.path task_prompt=response.task_prompt option=response.model TrustedOCR=response.TrustedOCR Trusted=TrustedOCR if task_prompt=="": if Trusted=="": row=db(db.trusted.path == path and db.trusted.mode == "OCR").select().first() try: Trusted=row.trusted except: pass Sal=main.EvalFacturas(path,task_prompt,TrustedOCR,option) Sal["path"]=path if db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).count()==0: db.analitic_ocr.insert(**Sal) db.commit() else: db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],jsonok=Sal["jsonok"]) db.commit() return Sal @app.get("/EvalLLMFact") @app.post("/EvalLLMFact") def EvalLLMFact(response:Response2): path=response.path task_prompt=response.task_prompt system=response.system content=response.content max_tokens=response.max_tokens model=response.model prompt=response.prompt TrustedLLmjson=response.TrustedLLmjson Sal=main.EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson) return Sal @app.get("/evalvoicehtml") def EvalVoicehtml(): dir_list = os.listdir(pathAud) Sal="" t=1 for i in dir_list: temp=""" """%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i)) Sal=Sal+temp t=t+1 html=""" Evaluacion de modelos voice2txt

Petición POST a API



"""%(Sal) return HTMLResponse(content=html, status_code=200) @app.get("/evalocrfactura") def EvalOCRFactura(): dir_list = os.listdir(pathFact) Sal="" t=1 for i in dir_list: temp=""" """%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i)) Sal=Sal+temp t=t+1 html=""" Evaluacion de modelos OCR

Petición POST a API





"""%(Sal) return HTMLResponse(content=html, status_code=200) def list2tablehtmlOCR(listdata,model): html="""

Table of {0}

""".format(model) for i in listdata: html=html+""" """%(i["path"],i["time"],i["similarity"],i["similaritypartial"]) html=html+"""
path time similarity similaritypartial
%s %s %s %s
""" return html def tableOCR(model): rows = db(db.analitic_ocr.model==model).select() rows_list = rows.as_list() data=pd.DataFrame(rows_list) time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0] similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0] similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0] card="""

{0}

time of process (sg)

{1}

similarity

{2}

similaritypartial

{3}

""".format(model,time,similarity,similaritypartial) return {"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtmlOCR(rows_list,model)} @app.get("/getmetricsocr") def getMetricsOCR(): models=list() for row in db().select(db.analitic_ocr.model, distinct=True): models.append(row.model) cards="" dataAll="" for model in models: Sal=tableOCR(model) cards=cards+Sal["card"] dataAll=dataAll+Sal["data"] htmlhead=""" Evaluacion de modelos voice2txt """ htmlbody="""

Estadisticas modelos de OCR

{0}
{1} """.format(cards,dataAll) html=htmlhead+htmlbody return HTMLResponse(content=html, status_code=200) @app.get("/evalllmfacturas") def EvalllmFacturas(): dir_list = os.listdir(pathFact) Sal="" t=1 for i in dir_list: temp=""" """%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i)) Sal=Sal+temp t=t+1 html=""" Evaluacion modelos LLM

Petición POST a API









"""%(Sal,"%s") return HTMLResponse(content=html, status_code=200)