159 lines
4.9 KiB
Python
159 lines
4.9 KiB
Python
import gradio as gr
|
|
from faiss import write_index, read_index
|
|
|
|
from langchain import PromptTemplate
|
|
from langchain.chains import LLMChain
|
|
from langchain.document_loaders import TextLoader
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from langchain.vectorstores import FAISS
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
from langchain.document_loaders import UnstructuredFileLoader
|
|
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
|
from langchain.document_loaders import UnstructuredURLLoader
|
|
from langchain.document_loaders.csv_loader import CSVLoader
|
|
from langchain import LLMChain
|
|
from langchain.llms import GPT4All
|
|
from langchain.embeddings import GPT4AllEmbeddings
|
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
from langchain.callbacks.base import BaseCallbackManager
|
|
from langchain.document_loaders import DataFrameLoader
|
|
from langchain.embeddings import HuggingFaceEmbeddings
|
|
import pandas as pd
|
|
import sqlite3
|
|
from sentence_transformers import SentenceTransformer
|
|
#from cleantext import clean
|
|
import time
|
|
import re
|
|
|
|
def loadModels():
|
|
#model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
|
callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
|
|
llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True,callback_manager=callback_manager,)# verbose=True,repeat_last_n=0
|
|
embeddings = GPT4AllEmbeddings()
|
|
return llm, embeddings
|
|
|
|
st=time.time()
|
|
llm,emb=loadModels()
|
|
st2=time.time()
|
|
|
|
Archives=[["https://www.gob.mx/sectur","Web"],
|
|
["https://centrohistorico.pueblacapital.gob.mx/nuestro-centro-historico/nuestro-blog/item/33-capilla-de-la-virgen-del-rosario","Web"],
|
|
["https://capilladelrosariopuebla.com.mx/","Web"],
|
|
#["https://www.tripadvisor.es/Tourism-g150768-Mexico-Vacations.html","Web"],
|
|
["https://www.mexicodestinos.com/blog/destinos-mexico/","Web"],
|
|
["https://visitmexico.com/","Web"],
|
|
["https://www.turismomexico.es/","Web"],
|
|
|
|
]
|
|
|
|
def makeDb(Archives,max_depth=2):
|
|
data=[]
|
|
for Archive in Archives:
|
|
if Archive[1]=="PDF":
|
|
loader = UnstructuredFileLoader(Archive[0])
|
|
data2 = loader.load()
|
|
elif Archive[1]=="RecursiveWeb":
|
|
loader = RecursiveUrlLoader(url=Archive[0], max_depth=max_depth)
|
|
data2 = loader.load()
|
|
elif Archive[1]=="Web":
|
|
#print("Web")
|
|
loader = UnstructuredURLLoader(urls=[Archive[0]])
|
|
data2 = loader.load()
|
|
#print(data2)
|
|
|
|
if data==[]:
|
|
data=data2
|
|
else:
|
|
data=data+data2
|
|
return data
|
|
A=makeDb(Archives)
|
|
|
|
|
|
def makeFinder():
|
|
text_splitter = RecursiveCharacterTextSplitter(
|
|
chunk_size = 1000,
|
|
length_function=len,
|
|
chunk_overlap=200
|
|
)
|
|
documents = text_splitter.split_documents(A)
|
|
try:
|
|
db=FAISS.load_local(folder_path="PaginasExternas",embeddings=emb)
|
|
|
|
except:
|
|
db = FAISS.from_documents(documents, emb)
|
|
FAISS.save_local(db,folder_path="PaginasExternas")
|
|
|
|
return db
|
|
|
|
db=makeFinder()
|
|
|
|
|
|
prompt_template = f"""
|
|
### System:
|
|
Answer the question based on the contexts below.
|
|
If the question cannot be answered using the information
|
|
provided answer with "I don't know". Ever answer in spanish.
|
|
|
|
### User:
|
|
|
|
Contexts:
|
|
{{relevant_context}}
|
|
|
|
Question:
|
|
{{user_query}}
|
|
|
|
### Response:
|
|
|
|
Respuesta:"""
|
|
|
|
|
|
prompt_template = PromptTemplate(
|
|
input_variables=["relevant_context","user_query"], template=prompt_template
|
|
)
|
|
|
|
# prompt=prompt_template.format(
|
|
# user_query="Que lugar de mexico es el mas lindo?",
|
|
# relevant_context="Otaisa es el lugar mas lindo de mexico pero esta muy cerca de nethai que no es tan lindo pero la comida es muy buena"
|
|
# )
|
|
# print(prompt)
|
|
llm_chain = LLMChain(llm=llm,prompt=prompt_template)
|
|
|
|
|
|
def FinderDb(query,dbs,filtred=False):
|
|
Sal = dbs.similarity_search_with_score(query,2)
|
|
page_content=[]
|
|
d=[]
|
|
if filtred:
|
|
lim=1.5
|
|
else:
|
|
lim=9000000
|
|
for output in Sal:
|
|
print(output)
|
|
if output[1]<lim:
|
|
page_content.append(output[0].page_content)
|
|
d.append(output[1])
|
|
if len(page_content)<1:
|
|
return None,None
|
|
|
|
return "/n".join(page_content),d
|
|
|
|
|
|
def QARequest(Pregunta,filtred=False):
|
|
query = Pregunta
|
|
context,d=FinderDb(query,db,filtred=True)
|
|
#A=llm_chain.run(user_query=query,relevant_context=context)
|
|
return context,d#A
|
|
|
|
|
|
#question = "What NFL team won the Super Bowl in the year Justin Bieber was born?"
|
|
with gr.Blocks() as demo:
|
|
Pregunta = gr.Textbox(label="Pregunta")
|
|
Respuesta = gr.Textbox(label="Respuesta")
|
|
metrica=gr.Textbox(label="metrica")
|
|
Enviar_btn = gr.Button("Responder")
|
|
Enviar_btn.click(fn=QARequest, inputs=[Pregunta], outputs=[Respuesta,metrica], api_name="Angela")
|
|
|
|
demo.launch() #
|
|
#print(A,time.time()-st,time.time()-st2)
|
|
|