import gradio as gr from faiss import write_index, read_index from langchain import PromptTemplate from langchain.chains import LLMChain from langchain.document_loaders import TextLoader from langchain.text_splitter import CharacterTextSplitter from langchain.vectorstores import FAISS from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.document_loaders import UnstructuredFileLoader from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader from langchain.document_loaders import UnstructuredURLLoader from langchain.document_loaders.csv_loader import CSVLoader from langchain import LLMChain from langchain.llms import GPT4All from langchain.embeddings import GPT4AllEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.base import BaseCallbackManager from langchain.document_loaders import DataFrameLoader from langchain.embeddings import HuggingFaceEmbeddings import pandas as pd import sqlite3 from sentence_transformers import SentenceTransformer #from cleantext import clean import re model_name = 'hiiamsid/sentence_similarity_spanish_es' model_kwargs = {'device': 'cpu'} encode_kwargs = {'normalize_embeddings': True} hf = HuggingFaceEmbeddings( model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs ) def loadModels(): #model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()]) llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True)#callback_manager=callback_manager, verbose=True,repeat_last_n=0 embeddings = GPT4AllEmbeddings() return llm, embeddings def loadCopysAndData(pathsqlite="motor.sqlite"): con = sqlite3.connect(pathsqlite) copies_df = pd.read_sql_query("SELECT * from copies", con) copiesT = copies_df[copies_df.copy_start =="T"] copiesT=copiesT[["copy_message","id","name"]] data = copiesT B=DataFrameLoader(data,page_content_column="copy_message") B2=DataFrameLoader(data,page_content_column="name") documents=B.load() documents2=B2.load() return documents,documents2 def makeFaissdb(documents,folder_path,embedding): try: db=FAISS.load_local(folder_path=folder_path,embeddings=embedding) except: db = FAISS.from_documents(documents, embedding) FAISS.save_local(db,folder_path=folder_path) return db llm,emb=loadModels() documents,documents2=loadCopysAndData() db=makeFaissdb(documents,"Copies",emb) db2=makeFaissdb(documents2,"names",emb) db3=makeFaissdb(documents2,"nameshf",hf) def FinderDbs(query,dbs,filtred=False,th=1.2): AllData={} for dbt in dbs: Sal = dbt.similarity_search_with_score(query,4) for output in Sal: if output[0].metadata["id"] in AllData.keys(): AllData[output[0].metadata["id"]]["d"]=min([AllData[output[0].metadata["id"]]["d"]-0.1,output[1]-0.1]) else: AllData[output[0].metadata["id"]]={"d":output[1],"page_content":output[0].page_content} for item in AllData.items(): print(item) if filtred: filtredData={} for row in AllData.keys(): if AllData[row]["d"]<1.2: filtredData[row]=AllData[row] filtredData=dict(sorted(filtredData.items(), key=lambda item: item[1]["d"])) return filtredData,filtredData.keys() else: AllData=dict(sorted(AllData.items(), key=lambda item: item[1]["d"])) return AllData,AllData.keys() def QARequest(Pregunta,filtred=False): query = Pregunta AllData=FinderDbs(query,[db,db2],filtred) return AllData with gr.Blocks() as demo: gr.Image("logo.jpg",height=100) gr.Markdown("Esta es la busqueda que hace el usuario") Pregunta = gr.Textbox(label="Pregunta") #Pregunta = re.sub(r"(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", Pregunta) #Pregunta=Pregunta.strip().lower() filtred=gr.Checkbox(label="filtrado") gr.Markdown("Respuestas para orca desde los copys") Respuesta = gr.Textbox(label="Respuesta") id = gr.Textbox(label="id") # metrica=gr.Textbox(label="metrica") # gr.Markdown("Respuestas para orca desde los names") # Respuesta2 = gr.Textbox(label="Respuesta2") # id2 = gr.Textbox(label="id2") # metrica2=gr.Textbox(label="metrica2") # gr.Markdown("Respuestas para hf desde los names") # Respuesta3 = gr.Textbox(label="Respuesta3") # id3 = gr.Textbox(label="id3") # metrica3=gr.Textbox(label="metrica3") Enviar_btn = gr.Button("Responder") Enviar_btn.click(fn=QARequest, inputs=[Pregunta,filtred], outputs=[Respuesta,id], api_name="Angela") # demo.launch() #