Metrics voice completes

This commit is contained in:
Mario Gil 2024-07-30 11:49:42 -05:00
parent 4b52d4aa91
commit f7e3913d2e
6 changed files with 317 additions and 48 deletions

1
.gitignore vendored
View File

@ -8,3 +8,4 @@ databases/storage.db
.vscode/* .vscode/*
__pycache__/* __pycache__/*
conf/experiment_config.json

131
apis.py
View File

@ -1,10 +1,10 @@
import fastapi import fastapi
from fastapi import FastAPI, Request from fastapi import FastAPI, Request
from fastapi.responses import HTMLResponse from fastapi.responses import HTMLResponse,JSONResponse
from pydantic import BaseModel from pydantic import BaseModel
import time import time
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from fastapi import FastAPI, Query, File, UploadFile from fastapi import FastAPI, Query, File, UploadFile,HTTPException
#from fastapi.middleware.cors import CORSMiddleware #from fastapi.middleware.cors import CORSMiddleware
from starlette.middleware.cors import CORSMiddleware from starlette.middleware.cors import CORSMiddleware
import main import main
@ -13,9 +13,23 @@ from databases import db
import audioread import audioread
import pandas as pd import pandas as pd
import statistics import statistics
import hashlib
from datetime import datetime
import json
pwd = os.getcwd() pwd = os.getcwd()
pathAud="example/audio" pathAud="example/audio"
pathFact="example/factura" pathFact="example/factura"
pathText="example/texto"
def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
configPath=os.path.join(os.getcwd(),relPath)
with open(configPath, 'r', encoding='utf-8') as file:
config = json.load(file)[nameModel]
Output= config[dataOut]
return Output
mode_list=extractConfig(nameModel="SystemData",dataOut="mode_list")
app = FastAPI() app = FastAPI()
#app.mount("/statics", StaticFiles(directory="statics"), name="statics") #app.mount("/statics", StaticFiles(directory="statics"), name="statics")
app.add_middleware( app.add_middleware(
@ -30,7 +44,6 @@ class Response(BaseModel):
"""Structure of data to querry of make post from X or article blog """Structure of data to querry of make post from X or article blog
""" """
path: str = Query("", description="Style and sentiments of text") path: str = Query("", description="Style and sentiments of text")
Trusted: str = Query("", description="Style and sentiments of text")
model : str = Query("whisper", description="Style and sentiments of text") model : str = Query("whisper", description="Style and sentiments of text")
class Response1(BaseModel): class Response1(BaseModel):
path: str = Query("", description="path file") path: str = Query("", description="path file")
@ -55,29 +68,95 @@ class Response3(BaseModel):
Trusted: str = Query("", description="Style and sentiments of text") Trusted: str = Query("", description="Style and sentiments of text")
mode : str = Query("whisper", description="Style and sentiments of text") mode : str = Query("whisper", description="Style and sentiments of text")
#Funcionales
@app.get("/addTrusted") @app.get("/addTrusted")
@app.post("/addTrusted") @app.post("/addTrusted")
def addTrusted(response:Response3): def addTrusted(response:Response3):
"""Api to add information of Trusted data
Args:
response (Response3): 3 params:
path : path of archive on system if is a file OR text if is text.
Trusted : information Trusted or better information in a process.
mode: llm_compra,llm_factura,llm_generaciontexto,llm_rag,ocr,voice,
Returns:
_type_: _description_
"""
path=response.path path=response.path
Trusted=response.Trusted Trusted=response.Trusted
mode=response.mode mode=response.mode
last_modified=datetime.now()
if mode not in mode_list.keys():
return JSONResponse(
status_code=404,
content={"content": "mode no found" }
)
if mode == "llm_factura" or mode == "ocr" or mode == "voice":
if not os.path.isfile(path):
return JSONResponse(
status_code=404,
content={"content": "file no found" }
)
if mode_list[mode]=="texto":
hash1 = hashlib.sha256(path.encode()).hexdigest()+".txt"
f = open("example/texto/"+hash1, "w")
f.write(path)
f.close()
path=pwd+"/"+pathText+hash1
length=len(Trusted)
size=0
duration=0
elif mode_list[mode]=="factura":
file_stats = os.stat(path) file_stats = os.stat(path)
size=file_stats.st_size / (1024 * 1024) size=file_stats.st_size / (1024 * 1024)
if mode=="voice": length=0
duration=0
elif mode_list[mode]=="audio":
with audioread.audio_open(path) as f: with audioread.audio_open(path) as f:
duration = f.duration duration = f.duration
else: length=0
duration = 0 size=0
if db(db.trusted.path == path and db.trusted.mode == mode).count()==0:
db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration =duration ) if db((db.trusted.path == path)&(db.trusted.mode == mode)).count()==0:
db.trusted.insert(path=path,trusted=Trusted,mode=mode,size=size,duration=duration,last_modified=last_modified,length=length )
db.commit() db.commit()
return "Add %s in mode %s"%(path,mode) return "Add %s in mode %s"%(path,mode)
else: else:
db(db.trusted.path == path and db.trusted.mode == mode).update(trusted=Trusted,size=size,duration =duration ) item=db((db.trusted.path == path)&(db.trusted.mode == mode)).select().last()
modification_count=item.modification_count + 1
db((db.trusted.path == path)&(db.trusted.mode == mode)).update(trusted=Trusted,size=size,duration =duration,length=length,last_modified=last_modified,modification_count= modification_count)
db.commit() db.commit()
return "Update %s in mode %s"%(path,mode) return "Update %s in mode %s"%(path,mode)
@app.get("/EvalVoice")
@app.post("/EvalVoice")
def EvalVoice(response:Response):
path=response.path
model=response.model
if db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).count()==0:
return JSONResponse(
status_code=404,
content={"content": "Trusted no found" }
)
Trusted=db((db.trusted.path == path ) & ( db.trusted.mode == "voice")).select().last().trusted
print(Trusted)
if model=="whisper":
Sal=main.EvalWhisper(path,Trusted)
else:
Sal=main.EvalVosk(path,Trusted)
Sal["last_modified"]=datetime.now()
if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
db.analitic_voice.insert(**Sal)
db.commit()
else:
db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"],last_modified=Sal["last_modified"])
db.commit()
return Sal
def list2tablehtml(listdata,model): def list2tablehtml(listdata,model):
html="""<h2>Table of {0}</h2> html="""<h2>Table of {0}</h2>
<table style="width:100%"> <table style="width:100%">
@ -223,30 +302,6 @@ display:flex;
return HTMLResponse(content=html, status_code=200) return HTMLResponse(content=html, status_code=200)
@app.get("/EvalVoice")
@app.post("/EvalVoice")
def EvalVoice(response:Response):
path=response.path
Trusted=response.Trusted
model=response.model
if Trusted=="":
row=db(db.trusted.path == path and db.trusted.mode == "voice").select().first()
try:
Trusted=row.trusted
except:
pass
if model=="whisper":
Sal=main.EvalWhisper(path,Trusted)
else:
Sal=main.EvalVosk(path,Trusted)
if db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).count()==0:
db.analitic_voice.insert(**Sal)
db.commit()
else:
db(db.analitic_voice.path == Sal["path"] and db.analitic_voice.model == Sal["model"]).update(similarity= Sal["similarity"],similaritypartial= Sal["similaritypartial"])
db.commit()
return Sal
@app.get("/EvalFact") @app.get("/EvalFact")
@app.post("/EvalFact") @app.post("/EvalFact")
@ -334,9 +389,7 @@ def EvalVoicehtml():
</select> </select>
<br> <br>
<input type="text" id="texto2" placeholder="Trusted"> <select id="texto2">
<br>
<select id="texto3">
<option value="whisper">whisper</option> <option value="whisper">whisper</option>
<option value="vosk">vosk</option> <option value="vosk">vosk</option>
</select> </select>
@ -348,11 +401,9 @@ def EvalVoicehtml():
function enviarPeticion() { function enviarPeticion() {
const texto1 = document.getElementById('texto1').value; const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value; const texto2 = document.getElementById('texto2').value;
const texto3 = document.getElementById('texto3').value;
const datos = { const datos = {
path: texto1, path: texto1,
Trusted: texto2, model: texto2
model: texto3
}; };
fetch('/EvalVoice', { fetch('/EvalVoice', {

View File

@ -5,8 +5,11 @@ db.define_table(
Field("path"), Field("path"),
Field("mode"), Field("mode"),
Field("trusted"), Field("trusted"),
Field("duration",type="double"), Field("duration",type="double",default=0),#audio
Field("size",type="double") Field("sizeMB",type="double",default=0),# audio,factura
Field("length",type="integer",default=0),#texto
Field('last_modified', 'datetime'),
Field('modification_count', 'integer', default=0)
) )
db.define_table( db.define_table(
"analitic_voice", "analitic_voice",
@ -16,7 +19,8 @@ db.define_table(
Field("time", type="double"), Field("time", type="double"),
Field("path"), Field("path"),
Field("similarity", type="double"), Field("similarity", type="double"),
Field("similaritypartial", type="double") Field("similaritypartial", type="double"),
Field('last_modified', 'datetime')
) )
db.define_table( db.define_table(
@ -28,16 +32,54 @@ db.define_table(
Field("path"), Field("path"),
Field("similarity", type="double"), Field("similarity", type="double"),
Field("similaritypartial", type="double"), Field("similaritypartial", type="double"),
Field("jsonok" ,type="integer") Field("jsonok" ,type="integer"),
Field('last_modified', 'datetime')
) )
db.define_table( db.define_table(
"analitic_llm", "analitic_llm_compra",
Field("content"), Field("content"),
Field("trusted"), Field("trusted"),
Field("model"), Field("model"),
Field("time", type="double"), Field("time", type="double"),
Field("path"), Field("path"),
Field("similarity", type="double"), Field("similarity", type="double"),
Field("similaritypartial", type="double") Field("similaritypartial", type="double"),
Field('last_modified', 'datetime')
)
db.define_table(
"analitic_llm_factura",
Field("content"),
Field("trusted"),
Field("model"),
Field("time", type="double"),
Field("path"),
Field("similarity", type="double"),
Field("similaritypartial", type="double"),
Field('last_modified', 'datetime')
)
db.define_table(
"analitic_llm_generaciontexto",
Field("content"),
Field("trusted"),
Field("model"),
Field("time", type="double"),
Field("path"),
Field("similarity", type="double"),
Field("similaritypartial", type="double"),
Field('last_modified', 'datetime')
)
db.define_table(
"analitic_llm_rag",
Field("content"),
Field("trusted"),
Field("model"),
Field("time", type="double"),
Field("path"),
Field("similarity", type="double"),
Field("similaritypartial", type="double"),
Field('last_modified', 'datetime')
) )

153
gui.py Normal file
View File

@ -0,0 +1,153 @@
from taipy.gui import Gui
import hashlib
import json
import codecs, os
from taipy.gui import Html
import pandas as pd
import requests
import statistics
from databases import db
pwd = os.getcwd()
HTML = os.path.join(pwd,"html", "index.html")
file_read = codecs.open(HTML, "r", "utf-8")
index = file_read.read()
html_page_index = Html(index)
def getmetricvoice(model):
rows = db(db.analitic_voice.model==model).select()
rows_list = rows.as_list()
data=pd.DataFrame(rows_list)
durationL=list()
for i in rows_list:
durationL.append(db(db.trusted.path == i["path"] ).select().last().duration)
duration=statistics.mean(durationL)
time=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['time'].values[0]
similarity=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similarity'].values[0]
similaritypartial=pd.pivot_table(data,values=['time','similarity', 'similaritypartial'],index="model")['similaritypartial'].values[0]
efectivetime=time/duration
return ({"model":model,"duration":duration,"time":time,"similarity":similarity,"similaritypartial":similaritypartial,"efectivetime":efectivetime})
def html_getmetricvoice():
models=list()
for row in db().select(db.analitic_voice.model, distinct=True):
models.append(row.model)
data={}
for model in models:
data[model]=getmetricvoice(model)
data=pd.DataFrame(data).T
datafiles={}
for row in db().select(db.analitic_voice.ALL):
datafiles[row.id]=row.as_dict()
datafiles=pd.DataFrame(datafiles).T
html="""
<taipy:table>{data_voice}</taipy:table>
<taipy:table filter=True>{data_files_voice}</taipy:table>
"""
return html,data,datafiles
html_page_getmetricsvoice,data_voice,data_files_voices=html_getmetricvoice()
def evalVoicehtml():
pathAud="example/audio"
dir_list = os.listdir(pathAud)
Sal=""
t=1
for i in dir_list:
temp="""<option value="%s">Opción %s, %s</option>
"""%(str(pwd+"/"+pathAud+"/"+i),str(t),str(i))
Sal=Sal+temp
t=t+1
html="""<!DOCTYPE html>
<html lang="es">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Evaluacion de modelos voice2txt</title>
<style>
body {
font-family: Arial, sans-serif;
margin: 20px;
}
input, button {
margin: 10px 0;
padding: 5px;
}
#respuesta {
margin-top: 20px;
padding: 10px;
border: 1px solid #ccc;
background-color: #f9f9f9;
}
</style>
</head>
<body>
<h1>Petición POST a API</h1>
<select id="texto1">
%s
</select>
<br>
<select id="texto2">
<option value="whisper">whisper</option>
<option value="vosk">vosk</option>
</select>
<br>
<button onclick="enviarPeticion()">Enviar petición</button>
<div id="respuesta"></div>
<script>
function enviarPeticion() {
const texto1 = document.getElementById('texto1').value;
const texto2 = document.getElementById('texto2').value;
const datos = {
path: texto1,
model: texto2
};
fetch('/EvalVoice', {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(datos)
})
.then(response => response.json())
.then(data => {
document.getElementById('respuesta').innerHTML = JSON.stringify(data, null, 2);
})
.catch(error => {
document.getElementById('respuesta').innerHTML = 'Error: ' + error;
});
}
</script>
</body>
</html>
"""%(Sal)
return html
html_page_evalvoice = Html(evalVoicehtml())
HTML = os.path.join(pwd,"html", "index.html")
file_read = codecs.open(HTML, "r", "utf-8")
index = file_read.read()
html_page_index = Html(index)
data=pd.DataFrame({'col1': [1, 2], 'col2': [3, 4]})
pages = {
"/": html_page_index ,
"getmetricsvoice": Html(html_page_getmetricsvoice),
"evalvoice":html_page_evalvoice
}
app = Gui(pages=pages)
if __name__=="__main__":
app.run(use_reloader=True,port=7882, change_delay=1600)#state.imageActive2,

View File

22
html/index.html Normal file
View File

@ -0,0 +1,22 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>CIDITEL AI Playground</title>
<link rel="stylesheet" tyle="text-decoration: none;" href="https://cdn.jsdelivr.net/npm/bulma@0.9.4/css/bulma.min.css">
<link rel="stylesheet" tyle="text-decoration: none;" href="https://fonts.googleapis.com/css2?family=Material+Symbols+Outlined:opsz,wght,FILL,GRAD@24,400,0,0" />
<link rel="shortcut icon" tyle="text-decoration: none;" href="statics/icons/favicon.svg" type="image/x-icon">
<link rel="stylesheet" tyle="text-decoration: none;" href="statics/css/style.css">
<link rel="stylesheet" tyle="text-decoration: none;" href="statics/css/media-queries.css">
<link rel="preconnect" tyle="text-decoration: none;" href="https://fonts.googleapis.com">
<link rel="preconnect" tyle="text-decoration: none;" href="https://fonts.gstatic.com" crossorigin>
<link tyle="text-decoration: none;" href="https://fonts.googleapis.com/css2?family=Kanit:ital,wght@0,100;0,200;0,300;0,400;0,500;0,600;0,700;0,800;0,900;1,100;1,200;1,300;1,400;1,500;1,600;1,700;1,800;1,900&display=swap" rel="stylesheet">
</head>
<body>
<taipy:table>{data}</taipy:table>
</body>
</html>