159 lines
5.6 KiB
Python
159 lines
5.6 KiB
Python
import gradio as gr
|
|
from faiss import write_index, read_index
|
|
|
|
from langchain import PromptTemplate
|
|
from langchain.chains import LLMChain
|
|
from langchain.document_loaders import TextLoader
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from langchain.vectorstores import FAISS
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
from langchain.document_loaders import UnstructuredFileLoader
|
|
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
|
from langchain.document_loaders import UnstructuredURLLoader
|
|
from langchain.document_loaders.csv_loader import CSVLoader
|
|
from langchain import LLMChain
|
|
from langchain.llms import GPT4All
|
|
from langchain.embeddings import GPT4AllEmbeddings
|
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
from langchain.callbacks.base import BaseCallbackManager
|
|
from langchain.document_loaders import DataFrameLoader
|
|
from langchain.embeddings import HuggingFaceEmbeddings
|
|
import pandas as pd
|
|
import sqlite3
|
|
from sentence_transformers import SentenceTransformer
|
|
from fastapi import FastAPI
|
|
#from cleantext import clean
|
|
import re
|
|
model_name = 'hiiamsid/sentence_similarity_spanish_es'
|
|
model_kwargs = {'device': 'cpu'}
|
|
encode_kwargs = {'normalize_embeddings': True}
|
|
hf = HuggingFaceEmbeddings(
|
|
model_name=model_name,
|
|
model_kwargs=model_kwargs,
|
|
encode_kwargs=encode_kwargs
|
|
)
|
|
|
|
CUSTOM_PATH = "/angela"
|
|
app = FastAPI()
|
|
|
|
@app.get("/")
|
|
def read_main():
|
|
return {"message": "This is your main app"}
|
|
|
|
|
|
def loadModels():
|
|
#model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
|
callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
|
|
llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True)#callback_manager=callback_manager, verbose=True,repeat_last_n=0
|
|
embeddings = GPT4AllEmbeddings()
|
|
return llm, embeddings
|
|
|
|
|
|
def loadCopysAndData(pathsqlite="motor.sqlite"):
|
|
con = sqlite3.connect(pathsqlite)
|
|
copies_df = pd.read_sql_query("SELECT * from copies", con)
|
|
copiesT = copies_df[copies_df.copy_start =="T"]
|
|
copiesT=copiesT[["copy_message","id","name"]]
|
|
data = copiesT
|
|
B=DataFrameLoader(data,page_content_column="copy_message")
|
|
B2=DataFrameLoader(data,page_content_column="name")
|
|
documents=B.load()
|
|
documents2=B2.load()
|
|
return documents,documents2
|
|
|
|
def makeFaissdb(documents,folder_path,embedding):
|
|
try:
|
|
db=FAISS.load_local(folder_path=folder_path,embeddings=embedding)
|
|
|
|
except:
|
|
db = FAISS.from_documents(documents, embedding)
|
|
FAISS.save_local(db,folder_path=folder_path)
|
|
return db
|
|
|
|
llm,emb=loadModels()
|
|
documents,documents2=loadCopysAndData()
|
|
|
|
db=makeFaissdb(documents,"Copies",emb)
|
|
db2=makeFaissdb(documents2,"names",emb)
|
|
db3=makeFaissdb(documents2,"nameshf",hf)
|
|
|
|
def FinderDbs(query,dbs,filtred=False,th=1.2):
|
|
AllData={}
|
|
for dbt in dbs:
|
|
Sal = dbt.similarity_search_with_score(query,4)
|
|
for output in Sal:
|
|
if output[0].metadata["id"] in AllData.keys():
|
|
AllData[output[0].metadata["id"]]["d"]=min([AllData[output[0].metadata["id"]]["d"]-0.1,output[1]-0.1])
|
|
else:
|
|
AllData[output[0].metadata["id"]]={"d":output[1],"page_content":output[0].page_content}
|
|
#for item in AllData.items():
|
|
# print(item)
|
|
|
|
if filtred:
|
|
filtredData={}
|
|
for row in AllData.keys():
|
|
if AllData[row]["d"]<1.2:
|
|
filtredData[row]=AllData[row]
|
|
filtredData=dict(sorted(filtredData.items(), key=lambda item: item[1]["d"]))
|
|
return filtredData,filtredData.keys()
|
|
|
|
|
|
else:
|
|
AllData=dict(sorted(AllData.items(), key=lambda item: item[1]["d"]))
|
|
return AllData,AllData.keys()
|
|
|
|
def QARequest(Pregunta,filtred=False):
|
|
query = Pregunta
|
|
AllData=FinderDbs(query,[db,db2],filtred)
|
|
|
|
if AllData:
|
|
import markdown
|
|
AllData = list(AllData)
|
|
#lista = "<div style='border-style = solid;border-width:1px;border-radius:10px'>"
|
|
lista = ""
|
|
for k,i in enumerate(AllData[0].items()):
|
|
titulo = f"<div style='border-style = solid;border-width:1px;border-radius:10px;margin:14px;padding:14px'><h2>Respuesta {k+1}</h2>"
|
|
to_append = markdown.markdown(i[1]['page_content'])
|
|
lista = lista + titulo + to_append + '</div>'
|
|
#lista.append('<br>')
|
|
#lista = lista + '</div>'
|
|
|
|
AllData[0] = lista
|
|
return AllData
|
|
|
|
|
|
|
|
with gr.Blocks() as demo:
|
|
gr.Image("logo.jpg",height=100)
|
|
gr.Markdown("Esta es la busqueda que hace el usuario")
|
|
Pregunta = gr.Textbox(label="Pregunta")
|
|
#Pregunta = re.sub(r"(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", Pregunta)
|
|
#Pregunta=Pregunta.strip().lower()
|
|
|
|
filtred=gr.Checkbox(label="filtrado")
|
|
|
|
gr.Markdown("Respuestas para orca desde los copys")
|
|
Respuesta = gr.Textbox(label="Respuesta")
|
|
id = gr.Textbox(label="id")
|
|
# metrica=gr.Textbox(label="metrica")
|
|
# gr.Markdown("Respuestas para orca desde los names")
|
|
# Respuesta2 = gr.Textbox(label="Respuesta2")
|
|
# id2 = gr.Textbox(label="id2")
|
|
# metrica2=gr.Textbox(label="metrica2")
|
|
# gr.Markdown("Respuestas para hf desde los names")
|
|
# Respuesta3 = gr.Textbox(label="Respuesta3")
|
|
# id3 = gr.Textbox(label="id3")
|
|
# metrica3=gr.Textbox(label="metrica3")
|
|
Enviar_btn = gr.Button("Responder")
|
|
|
|
Enviar_btn.click(fn=QARequest, inputs=[Pregunta,filtred], outputs=[gr.HTML(Respuesta),id], api_name="api_angela") #
|
|
|
|
#demo.launch(root_path="angela") #
|
|
|
|
gradio_app = gr.routes.App.create_app(demo)
|
|
|
|
app.mount(CUSTOM_PATH, gradio_app)
|
|
|
|
#app = demo.mount_gradio_app(app, io, path=CUSTOM_PATH)
|
|
|