diff --git a/app.py b/app.py index e45a2d1..fd478bc 100644 --- a/app.py +++ b/app.py @@ -12,7 +12,8 @@ from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader from langchain.document_loaders import UnstructuredURLLoader from langchain.document_loaders.csv_loader import CSVLoader from langchain import LLMChain -from langchain.llms import GPT4All +#from langchain.llms import GPT4All +from gpt4all import GPT4All from langchain.embeddings import GPT4AllEmbeddings from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler from langchain.callbacks.base import BaseCallbackManager @@ -26,9 +27,9 @@ import time import re def loadModels(): - #model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") - callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()]) - llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True,callback_manager=callback_manager,)# verbose=True,repeat_last_n=0 + llm = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") + + #llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True,callbacks=[StreamingStdOutCallbackHandler()])# verbose=True,repeat_last_n=0 embeddings = GPT4AllEmbeddings() return llm, embeddings @@ -38,11 +39,14 @@ st2=time.time() Archives=[["https://www.gob.mx/sectur","Web"], ["https://centrohistorico.pueblacapital.gob.mx/nuestro-centro-historico/nuestro-blog/item/33-capilla-de-la-virgen-del-rosario","Web"], - ["https://capilladelrosariopuebla.com.mx/","Web"], + #["https://capilladelrosariopuebla.com.mx/","Web"], #["https://www.tripadvisor.es/Tourism-g150768-Mexico-Vacations.html","Web"], ["https://www.mexicodestinos.com/blog/destinos-mexico/","Web"], ["https://visitmexico.com/","Web"], ["https://www.turismomexico.es/","Web"], + ["https://es.wikipedia.org/wiki/Capilla_del_Rosario_(Puebla)","Web"], + ["https://www.mexicodesconocido.com.mx/capilla-del-rosario-puebla.html","Web"] + ] @@ -71,9 +75,9 @@ A=makeDb(Archives) def makeFinder(): text_splitter = RecursiveCharacterTextSplitter( - chunk_size = 1000, + chunk_size = 250, length_function=len, - chunk_overlap=200 + chunk_overlap=50 ) documents = text_splitter.split_documents(A) try: @@ -106,6 +110,23 @@ Question: Respuesta:""" +# prompt_template = f""" +# ### System: +# Reponde la pregunta basado en el Contexto dado. +# Si la pregunta no puede ser contestada usando la informacion dada +# responder con "No poseo conocimiento sobre ese tema". Responder siempre en espaƱol. + +# ### User: + +# Contexto: +# {{relevant_context}} + +# Pregunta: +# {{user_query}} + +# ### Response: + +# Respuesta:""" prompt_template = PromptTemplate( input_variables=["relevant_context","user_query"], template=prompt_template @@ -116,11 +137,11 @@ prompt_template = PromptTemplate( # relevant_context="Otaisa es el lugar mas lindo de mexico pero esta muy cerca de nethai que no es tan lindo pero la comida es muy buena" # ) # print(prompt) -llm_chain = LLMChain(llm=llm,prompt=prompt_template) +#llm_chain = LLMChain(llm=llm,prompt=prompt_template) def FinderDb(query,dbs,filtred=False): - Sal = dbs.similarity_search_with_score(query,2) + Sal = dbs.similarity_search_with_score(query,9) page_content=[] d=[] if filtred: @@ -128,7 +149,6 @@ def FinderDb(query,dbs,filtred=False): else: lim=9000000 for output in Sal: - print(output) if output[1]