BotQuestionsAnswers/test.py

184 lines
6.9 KiB
Python

import gradio as gr
from faiss import write_index, read_index
from langchain import PromptTemplate
from langchain.chains import LLMChain
from langchain.document_loaders import TextLoader
from langchain.text_splitter import CharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.document_loaders import UnstructuredFileLoader
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
from langchain.document_loaders import UnstructuredURLLoader
from langchain import LLMChain
from langchain.llms import GPT4All
from langchain.embeddings import GPT4AllEmbeddings
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.callbacks.base import BaseCallbackManager
def loadModels():
#model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True)#callback_manager=callback_manager, verbose=True,repeat_last_n=0
embeddings = GPT4AllEmbeddings()
return llm, embeddings
llm,emb=loadModels()
def makePrompt():
# # set prompt template
prompt_template = """Usa las siguientes piezas de contexto para responder la pregunta al final. Si no conoces la respuesta, di no poseo informanción para responder y no intentes responder.
{context}
Question: {question}
Answer:"""
prompt_template = """Resume el siguiente texto en 500 palabras o menos contestando la siguiente pregunta: {question}. Si con la informacion dada abajo no pudieras responder la pregunta responde: No se.
{context}"""
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
{context}
Question: {question}
Answer:"""
prompt_template = """
Please use the following context to answer questions.
Context: {context}
- -
Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
return prompt
#Archives=[]
Archives=[["https://www.gob.mx/sectur","Web"],
#["https://www.tripadvisor.es/Tourism-g150768-Mexico-Vacations.html","Web"],
["https://www.mexicodestinos.com/blog/destinos-mexico/","Web"],
["https://visitmexico.com/","Web"],
["https://www.turismomexico.es/","Web"],
["./1689574595178.pdf","PDF"],["./public_key_cryptography.pdf","PDF"]
]
def makeDb(Archives,max_depth=2):
data=[]
for Archive in Archives:
if Archive[1]=="PDF":
loader = UnstructuredFileLoader(Archive[0])
data2 = loader.load()
elif Archive[1]=="RecursiveWeb":
loader = RecursiveUrlLoader(url=Archive[0], max_depth=max_depth)
data2 = loader.load()
elif Archive[1]=="Web":
#print("Web")
loader = UnstructuredURLLoader(urls=[Archive[0]])
data2 = loader.load()
#print(data2)
if data==[]:
data=data2
else:
data=data+data2
return data
A=makeDb(Archives)
def makeFinder():
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
length_function=len,
chunk_overlap=200
)
documents = text_splitter.split_documents(A)
try:
db=FAISS.load_local(folder_path="FAISS",embeddings=emb)
except:
db = FAISS.from_documents(documents, emb)
FAISS.save_local(db,folder_path="FAISS")
return db
db=makeFinder()
def QARequest(Pregunta):
query = Pregunta
output = db.similarity_search(query,2)
return output
with gr.Blocks() as demo:
Pregunta = gr.Textbox(label="Pregunta")
Respuesta = gr.Textbox(label="Respuesta")
Enviar_btn = gr.Button("Responder")
Enviar_btn.click(fn=QARequest, inputs=Pregunta, outputs=Respuesta, api_name="greet")
demo.launch()
# # Bring in deps
# import streamlit as st
# from langchain.llms import LlamaCpp
# from langchain.embeddings import LlamaCppEmbeddings
# from langchain.prompts import PromptTemplate
# from langchain.chains import LLMChain
# from langchain.document_loaders import TextLoader
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.vectorstores import Chroma
# # Customize the layout
# st.set_page_config(page_title="DOCAI", page_icon="🤖", layout="wide", )
# st.markdown(f"""
# <style>
# .stApp {{background-image: url("https://images.unsplash.com/photo-1509537257950-20f875b03669?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1469&q=80");
# background-attachment: fixed;
# background-size: cover}}
# </style>
# """, unsafe_allow_html=True)
# # function for writing uploaded file in temp
# def write_text_file(content, file_path):
# try:
# with open(file_path, 'w') as file:
# file.write(content)
# return True
# except Exception as e:
# print(f"Error occurred while writing the file: {e}")
# return False
# # set prompt template
# prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
# {context}
# Question: {question}
# Answer:"""
# prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
# # initialize hte LLM & Embeddings
# llm = LlamaCpp(model_path="./models/llama-7b.ggmlv3.q4_0.bin")
# embeddings = LlamaCppEmbeddings(model_path="models/llama-7b.ggmlv3.q4_0.bin")
# llm_chain = LLMChain(llm=llm, prompt=prompt)
# st.title("📄 Document Conversation 🤖")
# uploaded_file = st.file_uploader("Upload an article", type="txt")
# if uploaded_file is not None:
# content = uploaded_file.read().decode('utf-8')
# # st.write(content)
# file_path = "temp/file.txt"
# write_text_file(content, file_path)
# loader = TextLoader(file_path)
# docs = loader.load()
# text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
# texts = text_splitter.split_documents(docs)
# db = Chroma.from_documents(texts, embeddings)
# st.success("File Loaded Successfully!!")
# # Query through LLM
# question = st.text_input("Ask something from the file", placeholder="Find something similar to: ....this.... in the text?", disabled=not uploaded_file,)
# if question:
# similar_doc = db.similarity_search(question, k=1)
# context = similar_doc[0].page_content
# query_llm = LLMChain(llm=llm, prompt=prompt)
# response = query_llm.run({"context": context, "question": question})
# st.write(response)