add logo plus, filtred and refactoring

This commit is contained in:
Mario Gil 2023-09-24 00:37:33 -05:00
parent 131c5e375c
commit ea83ea6a3e
2 changed files with 85 additions and 51 deletions

View File

@ -17,18 +17,32 @@ from langchain.embeddings import GPT4AllEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.base import BaseCallbackManager from langchain.callbacks.base import BaseCallbackManager
from langchain.document_loaders import DataFrameLoader from langchain.document_loaders import DataFrameLoader
from langchain.embeddings import HuggingFaceEmbeddings
import pandas as pd import pandas as pd
import sqlite3 import sqlite3
from sentence_transformers import SentenceTransformer
#from cleantext import clean
import re
model_name = 'hiiamsid/sentence_similarity_spanish_es'
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
hf = HuggingFaceEmbeddings(
model_name=model_name,
model_kwargs=model_kwargs,
encode_kwargs=encode_kwargs
)
def loadModels(): def loadModels():
#model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") #model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()]) callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True)#callback_manager=callback_manager, verbose=True,repeat_last_n=0 llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True)#callback_manager=callback_manager, verbose=True,repeat_last_n=0
embeddings = GPT4AllEmbeddings() embeddings = GPT4AllEmbeddings()
return llm, embeddings return llm, embeddings
llm,emb=loadModels()
con = sqlite3.connect("motor.sqlite") def loadCopysAndData(pathsqlite="motor.sqlite"):
con = sqlite3.connect(pathsqlite)
copies_df = pd.read_sql_query("SELECT * from copies", con) copies_df = pd.read_sql_query("SELECT * from copies", con)
copiesT = copies_df[copies_df.copy_start =="T"] copiesT = copies_df[copies_df.copy_start =="T"]
copiesT=copiesT[["copy_message","id","name"]] copiesT=copiesT[["copy_message","id","name"]]
@ -37,60 +51,80 @@ B=DataFrameLoader(data,page_content_column="copy_message")
B2=DataFrameLoader(data,page_content_column="name") B2=DataFrameLoader(data,page_content_column="name")
documents=B.load() documents=B.load()
documents2=B2.load() documents2=B2.load()
return documents,documents2
def makeFaissdb(documents,folder_path,embedding):
try: try:
db=FAISS.load_local(folder_path="Copies",embeddings=emb) db=FAISS.load_local(folder_path=folder_path,embeddings=embedding)
except: except:
db = FAISS.from_documents(documents, emb) db = FAISS.from_documents(documents, embedding)
FAISS.save_local(db,folder_path="Copies") FAISS.save_local(db,folder_path=folder_path)
return db
try: llm,emb=loadModels()
db2=FAISS.load_local(folder_path="names",embeddings=emb) documents,documents2=loadCopysAndData()
except: db=makeFaissdb(documents,"Copies",emb)
db2 = FAISS.from_documents(documents2, emb) db2=makeFaissdb(documents2,"names",emb)
FAISS.save_local(db2,folder_path="names") db3=makeFaissdb(documents2,"nameshf",hf)
def FinderDb(query,dbs): def FinderDbs(query,dbs,filtred=False,th=1.2):
Sal = dbs.similarity_search_with_score(query,3) AllData={}
page_content=[] for dbt in dbs:
id=[] Sal = dbt.similarity_search_with_score(query,4)
d=[]
for output in Sal: for output in Sal:
page_content.append(output[0].page_content) if output[0].metadata["id"] in AllData.keys():
id.append(output[0].metadata["id"]) AllData[output[0].metadata["id"]]["d"]=min([AllData[output[0].metadata["id"]]["d"]-0.1,output[1]-0.1])
d.append(output[1]) else:
espacio=""" AllData[output[0].metadata["id"]]={"d":output[1],"page_content":output[0].page_content}
for item in AllData.items():
print(item)
if filtred:
filtredData={}
for row in AllData.keys():
if AllData[row]["d"]<1.2:
filtredData[row]=AllData[row]
filtredData=dict(sorted(filtredData.items(), key=lambda item: item[1]["d"]))
return filtredData,filtredData.keys()
######################## else:
AllData=dict(sorted(AllData.items(), key=lambda item: item[1]["d"]))
return AllData,AllData.keys()
def QARequest(Pregunta,filtred=False):
"""
page_content=espacio.join(page_content)
return page_content,d,id
def QARequest(Pregunta):
query = Pregunta query = Pregunta
page_content,d,id=FinderDb(query,db) AllData=FinderDbs(query,[db,db2],filtred)
page_content2,d2,id2=FinderDb(query,db2) return AllData
return page_content,d,id,page_content2,d2,id2
with gr.Blocks() as demo: with gr.Blocks() as demo:
gr.Image("logo.jpg",height=100)
gr.Markdown("Esta es la busqueda que hace el usuario")
Pregunta = gr.Textbox(label="Pregunta") Pregunta = gr.Textbox(label="Pregunta")
#Respuesta = gr.Textbox(label="Respuesta") #Pregunta = re.sub(r"(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", Pregunta)
#id = gr.Textbox(label="id") #Pregunta=Pregunta.strip().lower()
filtred=gr.Checkbox(label="filtrado")
gr.Markdown("Respuestas para orca desde los copys")
Respuesta = gr.Textbox(label="Respuesta")
id = gr.Textbox(label="id")
# metrica=gr.Textbox(label="metrica") # metrica=gr.Textbox(label="metrica")
Respuesta2 = gr.Textbox(label="Respuesta2") # gr.Markdown("Respuestas para orca desde los names")
id2 = gr.Textbox(label="id2") # Respuesta2 = gr.Textbox(label="Respuesta2")
metrica2=gr.Textbox(label="metrica2") # id2 = gr.Textbox(label="id2")
# metrica2=gr.Textbox(label="metrica2")
# gr.Markdown("Respuestas para hf desde los names")
# Respuesta3 = gr.Textbox(label="Respuesta3")
# id3 = gr.Textbox(label="id3")
# metrica3=gr.Textbox(label="metrica3")
Enviar_btn = gr.Button("Responder") Enviar_btn = gr.Button("Responder")
Enviar_btn.click(fn=QARequest, inputs=Pregunta, outputs=[Respuesta2,metrica2,id2], api_name="Respuestas") # Respuesta,metrica,id, Enviar_btn.click(fn=QARequest, inputs=[Pregunta,filtred], outputs=[Respuesta,id], api_name="Angela") #
demo.launch() # demo.launch() #

BIN
logo.jpg Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB