EvalDataSetHugging/apis.py

import fastapi
from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse,JSONResponse
from pydantic import BaseModel
import time
from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI, Query, File, UploadFile,HTTPException
#from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.cors import CORSMiddleware
import main
import os
from databases import db
import audioread
import pandas as pd
import statistics
import hashlib
from datetime import datetime
import json
pwd = os.getcwd()
pathAud="example/audio"
pathFact="example/factura"
pathText="example/texto"

def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
    configPath=os.path.join(os.getcwd(),relPath)
    with open(configPath, 'r', encoding='utf-8') as file:
        config = json.load(file)[nameModel]
    Output= config[dataOut]
    return Output
mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")


app = FastAPI()
#app.mount("/statics", StaticFiles(directory="statics"), name="statics")
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

class Response(BaseModel):
    """Structure of data to querry of make post from X or article blog
    """
    path: str = Query("", description="Style and sentiments of text")
    model : str = Query("whisper", description="Style and sentiments of text")
class Response1(BaseModel):
    path: str = Query("", description="path file")
    task_prompt: str = Query("", description="task of model")
    model: str = Query("", description="model")
    TrustedOCR: str = Query("", description="truted OCR model")
    option: str = Query("", description="OCR model option")
class Response2(BaseModel):
    path: str = Query("", description="path file")
    task_prompt: str = Query("", description="task of model")
    system: str = Query("", description="prompt system LLM model with ocr and image claude")
    content: str = Query("%s", description="prompt content LLM  model with ocr")
    max_tokens: int = Query(1024, description="maxtoken LLM OCR model")
    model: str = Query("Claude-sonnet", description="model")
    prompt: str = Query("", description="prompt in claude with image")
    TrustedLLmjson: str = Query("", description="truted OCR model")

class Response3(BaseModel):
    """Structure of data to querry of make post from X or article blog
    """
    path: str = Query("", description="Style and sentiments of text")
    Trusted: str = Query("", description="Style and sentiments of text")
    mode : str = Query("whisper", description="Style and sentiments of text")

#Funcionales
@app.get("/addTrusted")
@app.post("/addTrusted")
def addTrusted(response:Response3):
    """Api to add information of Trusted data

    Args:
        response (Response3): 3 params:
        path : path of archive on system if is a file OR text if is text.
        Trusted : information Trusted or better information in a process.
        mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice,

    Returns:
        _type_: _description_
    """
    path=response.path
    Trusted=response.Trusted
    mode=response.mode
    last_modified=datetime.now()
    if mode not in mode_list.keys():
        return JSONResponse(
        status_code=404,
        content={"content": "mode no found" }
    )
    if mode == "llm_factura" or mode == "ocr" or mode == "voice":
        if not os.path.isfile(path):
            return JSONResponse(
            status_code=404,
            content={"content": "file no found" }
            )
    if mode_list[mode]=="texto":
        hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt"
        f = open("example/texto/"+hash1, "w")
        f.write(path)
        f.close()
        path=pwd+"/"+pathText+hash1
        length=len(Trusted)
        size=0
        duration=0
    elif mode_list[mode]=="factura":
        file_stats = os.stat(path)
        size=file_stats.st_size / (1024 * 1024)
        length=0
        duration=0
    elif mode_list[mode]=="audio":
        with audioread.audio_open(path) as f:
            duration = f.duration
            length=0
            size=0

    if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0:
        db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length )
        db.commit()
        return "Add %s in mode %s"%(path,mode)
    else:
        item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last()
        modification_count=item.modification_count + 1
        db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count)
        db.commit()
        return "Update %s in mode %s"%(path,mode)

@app.get("/EvalVoice")
@app.post("/EvalVoice")
def EvalVoice(response:Response):
    path=response.path
    model=response.model
    if db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).count()==0:
        return JSONResponse(
        status_code=404,
        content={"content": "Trusted no found" }
    )

    Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted
    print(Trusted)
    if model=="whisper":
        Sal=main.EvalWhisper(path,Trusted)
    else:
        Sal=main.EvalVosk(path,Trusted)
    Sal["last_modified"]=datetime.now()
    if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
        db.analitic_voice.insert(**Sal)
        db.commit()
    else:
        db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
        db.commit()
    return Sal


def list2tablehtml(listdata,model):
    html="""<h2>Table of {0}</h2>
<table style="width:100%">
  <tr>
    <th>path</th>
    <th>time</th>
    <th>similarity</th>
    <th>similaritypartial</th>
  </tr>""".format(model)

    for i in listdata:
        html=html+"""  <tr>
    <td>%s</td>
    <td>%s</td>
    <td>%s</td>
    <td>%s</td>
  </tr>
"""%(i["path"],i["time"],i["similarity"],i["similaritypartial"])
    html=html+"""</table>
    """
    return html


def tableVoice(model):
    rows = db(db.analitic_voice.model==model).select()
    rows_list = rows.as_list()
    data=pd.DataFrame(rows_list)
    durationL=list()
    for i in rows_list:
        durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
    duration=statistics.mean(durationL)
    time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
    similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
    similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
    efectivetime=time/duration
    card="""<div class="flip-card">
  <div class="flip-card-inner">
    <div class="flip-card-front">
      <p style="width:300px;height:300px;">{0} </p>
    </div>
    <div class="flip-card-back">
      <h1>time of process (sg)</h1>
      <p>{1}</p>
      <h1>similarity</h1>
      <p>{2}</p>
      <h1>similaritypartial</h1>
      <p>{3}</p>
      <h1>time of audio(sg)</h1>
      <p>{4}</p>
      <h1>time in process</h1>
      <p>{5}</p>
    </div>
  </div>
</div>""".format(model,time,similarity,similaritypartial,duration,efectivetime)
    return {"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtml(rows_list,model)}


@app.get("/getmetricsvoice")
def getMetricsVoice():
    pass
    models=list()
    for row in db().select(db.analitic_voice.model, distinct=True):
        models.append(row.model)
    cards=""
    dataAll=""
    for model in models:

        Sal=tableVoice(model)
        cards=cards+Sal["card"]
        dataAll=dataAll+Sal["data"]


    htmlhead="""<!DOCTYPE html>
<html lang="es">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Evaluacion de modelos voice2txt</title>
    <style>
. container{

display:flex;

}
/* The flip card container - set the width and height to whatever you want. We have added the border property to demonstrate that the flip itself goes out of the box on hover (remove perspective if you don't want the 3D effect */
.flip-card {
  background-color: transparent;
  width: 500px;
  height: 500px;
  border: 1px solid #f1f1f1;
  perspective: 1000px; /* Remove this if you don't want the 3D effect */
}

/* This container is needed to position the front and back side */
.flip-card-inner {
  position: relative;
  width: 100%;
  height: 100%;
  text-align: center;
  transition: transform 0.8s;
  transform-style: preserve-3d;
}

/* Do an horizontal flip when you move the mouse over the flip box container */
.flip-card:hover .flip-card-inner {
  transform: rotateY(180deg);
}

/* Position the front and back side */
.flip-card-front, .flip-card-back {
  position: absolute;
  width: 100%;
  height: 100%;
  -webkit-backface-visibility: hidden; /* Safari */
  backface-visibility: hidden;
}

/* Style the front side (fallback if image is missing) */
.flip-card-front {
  background-color: #bbb;
  color: black;
}

/* Style the back side */
.flip-card-back {
  background-color: dodgerblue;
  color: white;
  transform: rotateY(180deg);
}
    </style>
</head>"""

    htmlbody="""<body>
    <h1>Estadisticas modelos de voice</h1>
    <div  class=”container”>
    {0}
    </div>
    {1}
</body>
</html>
    """.format(cards,dataAll)
    html=htmlhead+htmlbody
    return HTMLResponse(content=html, status_code=200)


@app.get("/EvalFact")
@app.post("/EvalFact")
def EvalFact(response:Response1):
    path=response.path
    task_prompt=response.task_prompt
    option=response.model
    TrustedOCR=response.TrustedOCR
    Trusted=TrustedOCR
    if task_prompt=="":
        if Trusted=="":
            row=db(db.trusted.path == path and db.trusted.mode == "OCR").select().first()
            try:
                Trusted=row.trusted
            except:
                pass
    Sal=main.EvalFacturas(path,task_prompt,TrustedOCR,option)
    Sal["path"]=path
    if db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).count()==0:
        db.analitic_ocr.insert(**Sal)
        db.commit()
    else:
        db(db.analitic_ocr.path == Sal["path"] and db.analitic_ocr.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],jsonok=Sal["jsonok"])
        db.commit()


    return Sal

@app.get("/EvalLLMFact")
@app.post("/EvalLLMFact")
def EvalLLMFact(response:Response2):
    path=response.path
    task_prompt=response.task_prompt
    system=response.system
    content=response.content
    max_tokens=response.max_tokens
    model=response.model
    prompt=response.prompt
    TrustedLLmjson=response.TrustedLLmjson

    Sal=main.EvalllmFacturas(path,task_prompt,system,content,max_tokens,model,prompt,TrustedLLmjson)
    return Sal

@app.get("/evalvoicehtml")
def EvalVoicehtml():
    dir_list = os.listdir(pathAud)
    Sal=""
    t=1
    for i in dir_list:

        temp="""<option value="%s">Opción %s, %s</option>
        """%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i))
        Sal=Sal+temp
        t=t+1


    html="""<!DOCTYPE html>
<html lang="es">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Evaluacion de modelos voice2txt</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
        }
        input, button {
            margin: 10px 0;
            padding: 5px;
        }
        #respuesta {
            margin-top: 20px;
            padding: 10px;
            border: 1px solid #ccc;
            background-color: #f9f9f9;
        }
    </style>
</head>
<body>
    <h1>Petición POST a API</h1>

    <select id="texto1">
        %s
    </select>

    <br>
    <select id="texto2">
        <option value="whisper">whisper</option>
        <option value="vosk">vosk</option>
    </select>
    <br>
    <button onclick="enviarPeticion()">Enviar petición</button>
    <div id="respuesta"></div>

    <script>
        function enviarPeticion() {
            const texto1 = document.getElementById('texto1').value;
            const texto2 = document.getElementById('texto2').value;
            const datos = {
                path: texto1,
                model: texto2
            };

            fetch('/EvalVoice', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify(datos)
            })
            .then(response => response.json())
            .then(data => {
                document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
            })
            .catch(error => {
                document.getElementById('respuesta').innerHTML = 'Error: ' + error;
            });
        }
    </script>
</body>
</html>
    """%(Sal)
    return HTMLResponse(content=html, status_code=200)


@app.get("/evalocrfactura")
def EvalOCRFactura():
    dir_list = os.listdir(pathFact)
    Sal=""
    t=1
    for i in dir_list:
        temp="""<option value="%s">Opción %s, %s</option>
        """%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
        Sal=Sal+temp
        t=t+1
    html="""<!DOCTYPE html>
<html lang="es">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Evaluacion de modelos OCR</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
        }
        input, button {
            margin: 10px 0;
            padding: 5px;
        }
        #respuesta {
            margin-top: 20px;
            padding: 10px;
            border: 1px solid #ccc;
            background-color: #f9f9f9;
        }
    </style>
</head>
<body>
    <h1>Petición POST a API</h1>
    <select id="texto1">
        %s
    </select>
    <br>

    <select id="texto2">
        <option value="More Detailed Caption">More Detailed Caption</option>
        <option value="OCR">OCR</option>
        <option value="parsed">parsed</option>
        <option value="scan">scan</option>
    </select>
    <br>
    <input type="text" id="texto3" placeholder="TrustedOCR">
    <br>
    <input type="text" id="texto4" placeholder="option">
    <br>
    <button onclick="enviarPeticion()">Enviar petición</button>
    <div id="respuesta"></div>

    <script>
        function enviarPeticion() {
            const texto1 = document.getElementById('texto1').value;
            const texto2 = document.getElementById('texto2').value;
            const texto3 = document.getElementById('texto3').value;
            const texto4 = document.getElementById('texto4').value;
            const datos = {
                path: texto1,
                task_prompt: texto2,
                TrustedOCR: texto3,
                option: texto4
            };

            fetch('/EvalFact', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify(datos)
            })
            .then(response => response.json())
            .then(data => {
                document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
            })
            .catch(error => {
                document.getElementById('respuesta').innerHTML = 'Error: ' + error;
            });
        }
    </script>
</body>
</html>
    """%(Sal)
    return HTMLResponse(content=html, status_code=200)

def list2tablehtmlOCR(listdata,model):
    html="""<h2>Table of {0}</h2>
<table style="width:100%">
  <tr>
    <th>path</th>
    <th>time</th>
    <th>similarity</th>
    <th>similaritypartial</th>
  </tr>""".format(model)

    for i in listdata:
        html=html+"""  <tr>
    <td>%s</td>
    <td>%s</td>
    <td>%s</td>
    <td>%s</td>
  </tr>
"""%(i["path"],i["time"],i["similarity"],i["similaritypartial"])
    html=html+"""</table>
    """
    return html


def tableOCR(model):
    rows = db(db.analitic_ocr.model==model).select()
    rows_list = rows.as_list()
    data=pd.DataFrame(rows_list)
    time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
    similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
    similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
    card="""<div class="flip-card">
  <div class="flip-card-inner">
    <div class="flip-card-front">
      <p style="width:300px;height:300px;">{0} </p>
    </div>
    <div class="flip-card-back">
      <h1>time of process (sg)</h1>
      <p>{1}</p>
      <h1>similarity</h1>
      <p>{2}</p>
      <h1>similaritypartial</h1>
      <p>{3}</p>
    </div>
  </div>
</div>""".format(model,time,similarity,similaritypartial)
    return {"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"card":card,"data":list2tablehtmlOCR(rows_list,model)}


@app.get("/getmetricsocr")
def getMetricsOCR():
    models=list()
    for row in db().select(db.analitic_ocr.model, distinct=True):
        models.append(row.model)
    cards=""
    dataAll=""
    for model in models:
        Sal=tableOCR(model)
        cards=cards+Sal["card"]
        dataAll=dataAll+Sal["data"]
    htmlhead="""<!DOCTYPE html>
<html lang="es">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Evaluacion de modelos voice2txt</title>
    <style>
. container{

display:flex;

}
/* The flip card container - set the width and height to whatever you want. We have added the border property to demonstrate that the flip itself goes out of the box on hover (remove perspective if you don't want the 3D effect */
.flip-card {
  background-color: transparent;
  width: 500px;
  height: 500px;
  border: 1px solid #f1f1f1;
  perspective: 1000px; /* Remove this if you don't want the 3D effect */
}

/* This container is needed to position the front and back side */
.flip-card-inner {
  position: relative;
  width: 100%;
  height: 100%;
  text-align: center;
  transition: transform 0.8s;
  transform-style: preserve-3d;
}

/* Do an horizontal flip when you move the mouse over the flip box container */
.flip-card:hover .flip-card-inner {
  transform: rotateY(180deg);
}

/* Position the front and back side */
.flip-card-front, .flip-card-back {
  position: absolute;
  width: 100%;
  height: 100%;
  -webkit-backface-visibility: hidden; /* Safari */
  backface-visibility: hidden;
}

/* Style the front side (fallback if image is missing) */
.flip-card-front {
  background-color: #bbb;
  color: black;
}

/* Style the back side */
.flip-card-back {
  background-color: dodgerblue;
  color: white;
  transform: rotateY(180deg);
}
    </style>
</head>"""

    htmlbody="""<body>
    <h1>Estadisticas modelos de OCR</h1>
    <div  class=”container”>
    {0}
    </div>
    {1}
</body>
</html>
    """.format(cards,dataAll)
    html=htmlhead+htmlbody
    return HTMLResponse(content=html, status_code=200)


@app.get("/evalllmfacturas")
def EvalllmFacturas():
    dir_list = os.listdir(pathFact)
    Sal=""
    t=1
    for i in dir_list:
        temp="""<option value="%s">Opción %s, %s</option>
        """%(str(pwd+"/"+pathFact+"/"+i),str(t),str(i))
        Sal=Sal+temp
        t=t+1
    html="""<!DOCTYPE html>
<html lang="es">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>Evaluacion modelos LLM</title>
    <style>
        body {
            font-family: Arial, sans-serif;
            margin: 20px;
        }
        input, button {
            margin: 10px 0;
            padding: 5px;
        }
        #respuesta {
            margin-top: 20px;
            padding: 10px;
            border: 1px solid #ccc;
            background-color: #f9f9f9;
        }
    </style>
</head>
<body>
    <h1>Petición POST a API</h1>
    <select id="texto1">
        %s
    </select>
    <br>

    <select id="texto2">
        <option value="">N.A.</option>
        <option value="More Detailed Caption">More Detailed Caption</option>
        <option value="OCR">OCR</option>
        <option value="parsed">parsed</option>
        <option value="scan">scan</option>
    </select>
    <br>
    <input type="text" id="texto3" placeholder="system" value="Eres un chatbot amable">
    <br>
    <input type="text" id="texto4" placeholder="content" value="%s">
    <br>
    <input type="number" id="texto5" placeholder="max_tokens" value=1024>
    <br>
        <input type="text" id="texto6" placeholder="model" value="Claude-sonnet">
    <br>
        <input type="text" id="texto7" placeholder="prompt" value="Analiza la factura">
    <br>
    <input type="text" id="texto8" placeholder="TrustedLLmjson" value="{'A':''}">
    <br>
    <button onclick="enviarPeticion()">Enviar petición</button>
    <div id="respuesta"></div>

    <script>
        function enviarPeticion() {
            const texto1 = document.getElementById('texto1').value;
            const texto2 = document.getElementById('texto2').value;
            const texto3 = document.getElementById('texto3').value;
            const texto4 = document.getElementById('texto4').value;
            const texto5 = document.getElementById('texto5').value;
            const texto6 = document.getElementById('texto6').value;
            const texto7 = document.getElementById('texto7').value;
            const texto8 = document.getElementById('texto8').value;

            const datos = {
                path: texto1,
                task_prompt: texto2,
                system: texto3,
                content:texto4,
                max_tokens:texto5,
                model:texto6,
                prompt:texto7,
                TrustedLLmjson:texto8,
            };

            fetch('/EvalLLMFact', {
                method: 'POST',
                headers: {
                    'Content-Type': 'application/json'
                },
                body: JSON.stringify(datos)
            })
            .then(response => response.json())
            .then(data => {
                document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
            })
            .catch(error => {
                document.getElementById('respuesta').innerHTML = 'Error: ' + error;
            });
        }
    </script>
</body>
</html>
    """%(Sal,"%s")
    return HTMLResponse(content=html, status_code=200)