184 lines
6.9 KiB
Python
184 lines
6.9 KiB
Python
import gradio as gr
|
|
from faiss import write_index, read_index
|
|
|
|
from langchain import PromptTemplate
|
|
from langchain.chains import LLMChain
|
|
from langchain.document_loaders import TextLoader
|
|
from langchain.text_splitter import CharacterTextSplitter
|
|
from langchain.vectorstores import FAISS
|
|
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
|
from langchain.document_loaders import UnstructuredFileLoader
|
|
from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader
|
|
from langchain.document_loaders import UnstructuredURLLoader
|
|
|
|
from langchain import LLMChain
|
|
from langchain.llms import GPT4All
|
|
from langchain.embeddings import GPT4AllEmbeddings
|
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
from langchain.callbacks.base import BaseCallbackManager
|
|
|
|
def loadModels():
|
|
#model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
|
callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
|
|
llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True)#callback_manager=callback_manager, verbose=True,repeat_last_n=0
|
|
embeddings = GPT4AllEmbeddings()
|
|
return llm, embeddings
|
|
llm,emb=loadModels()
|
|
def makePrompt():
|
|
# # set prompt template
|
|
|
|
prompt_template = """Usa las siguientes piezas de contexto para responder la pregunta al final. Si no conoces la respuesta, di no poseo informanción para responder y no intentes responder.
|
|
{context}
|
|
Question: {question}
|
|
Answer:"""
|
|
|
|
prompt_template = """Resume el siguiente texto en 500 palabras o menos contestando la siguiente pregunta: {question}. Si con la informacion dada abajo no pudieras responder la pregunta responde: No se.
|
|
{context}"""
|
|
|
|
prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
|
{context}
|
|
Question: {question}
|
|
Answer:"""
|
|
|
|
prompt_template = """
|
|
Please use the following context to answer questions.
|
|
Context: {context}
|
|
- -
|
|
Question: {question}
|
|
Answer: Let's think step by step."""
|
|
prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
|
|
|
return prompt
|
|
|
|
#Archives=[]
|
|
Archives=[["https://www.gob.mx/sectur","Web"],
|
|
#["https://www.tripadvisor.es/Tourism-g150768-Mexico-Vacations.html","Web"],
|
|
["https://www.mexicodestinos.com/blog/destinos-mexico/","Web"],
|
|
["https://visitmexico.com/","Web"],
|
|
["https://www.turismomexico.es/","Web"],
|
|
["./1689574595178.pdf","PDF"],["./public_key_cryptography.pdf","PDF"]
|
|
]
|
|
|
|
def makeDb(Archives,max_depth=2):
|
|
data=[]
|
|
for Archive in Archives:
|
|
if Archive[1]=="PDF":
|
|
loader = UnstructuredFileLoader(Archive[0])
|
|
data2 = loader.load()
|
|
elif Archive[1]=="RecursiveWeb":
|
|
loader = RecursiveUrlLoader(url=Archive[0], max_depth=max_depth)
|
|
data2 = loader.load()
|
|
elif Archive[1]=="Web":
|
|
#print("Web")
|
|
loader = UnstructuredURLLoader(urls=[Archive[0]])
|
|
data2 = loader.load()
|
|
#print(data2)
|
|
|
|
if data==[]:
|
|
data=data2
|
|
else:
|
|
data=data+data2
|
|
return data
|
|
A=makeDb(Archives)
|
|
|
|
def makeFinder():
|
|
text_splitter = RecursiveCharacterTextSplitter(
|
|
chunk_size = 1000,
|
|
length_function=len,
|
|
chunk_overlap=200
|
|
)
|
|
documents = text_splitter.split_documents(A)
|
|
try:
|
|
db=FAISS.load_local(folder_path="FAISS",embeddings=emb)
|
|
|
|
except:
|
|
db = FAISS.from_documents(documents, emb)
|
|
FAISS.save_local(db,folder_path="FAISS")
|
|
|
|
return db
|
|
|
|
db=makeFinder()
|
|
|
|
def QARequest(Pregunta):
|
|
query = Pregunta
|
|
output = db.similarity_search(query,2)
|
|
return output
|
|
|
|
|
|
with gr.Blocks() as demo:
|
|
Pregunta = gr.Textbox(label="Pregunta")
|
|
Respuesta = gr.Textbox(label="Respuesta")
|
|
Enviar_btn = gr.Button("Responder")
|
|
Enviar_btn.click(fn=QARequest, inputs=Pregunta, outputs=Respuesta, api_name="greet")
|
|
|
|
|
|
demo.launch()
|
|
|
|
|
|
# # Bring in deps
|
|
# import streamlit as st
|
|
# from langchain.llms import LlamaCpp
|
|
# from langchain.embeddings import LlamaCppEmbeddings
|
|
# from langchain.prompts import PromptTemplate
|
|
# from langchain.chains import LLMChain
|
|
# from langchain.document_loaders import TextLoader
|
|
# from langchain.text_splitter import CharacterTextSplitter
|
|
# from langchain.vectorstores import Chroma
|
|
|
|
|
|
# # Customize the layout
|
|
# st.set_page_config(page_title="DOCAI", page_icon="🤖", layout="wide", )
|
|
# st.markdown(f"""
|
|
# <style>
|
|
# .stApp {{background-image: url("https://images.unsplash.com/photo-1509537257950-20f875b03669?ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D&auto=format&fit=crop&w=1469&q=80");
|
|
# background-attachment: fixed;
|
|
# background-size: cover}}
|
|
# </style>
|
|
# """, unsafe_allow_html=True)
|
|
|
|
# # function for writing uploaded file in temp
|
|
# def write_text_file(content, file_path):
|
|
# try:
|
|
# with open(file_path, 'w') as file:
|
|
# file.write(content)
|
|
# return True
|
|
# except Exception as e:
|
|
# print(f"Error occurred while writing the file: {e}")
|
|
# return False
|
|
|
|
# # set prompt template
|
|
# prompt_template = """Use the following pieces of context to answer the question at the end. If you don't know the answer, just say that you don't know, don't try to make up an answer.
|
|
# {context}
|
|
# Question: {question}
|
|
# Answer:"""
|
|
# prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
|
|
|
|
# # initialize hte LLM & Embeddings
|
|
# llm = LlamaCpp(model_path="./models/llama-7b.ggmlv3.q4_0.bin")
|
|
# embeddings = LlamaCppEmbeddings(model_path="models/llama-7b.ggmlv3.q4_0.bin")
|
|
# llm_chain = LLMChain(llm=llm, prompt=prompt)
|
|
|
|
# st.title("📄 Document Conversation 🤖")
|
|
# uploaded_file = st.file_uploader("Upload an article", type="txt")
|
|
|
|
# if uploaded_file is not None:
|
|
# content = uploaded_file.read().decode('utf-8')
|
|
# # st.write(content)
|
|
# file_path = "temp/file.txt"
|
|
# write_text_file(content, file_path)
|
|
|
|
# loader = TextLoader(file_path)
|
|
# docs = loader.load()
|
|
# text_splitter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
|
|
# texts = text_splitter.split_documents(docs)
|
|
# db = Chroma.from_documents(texts, embeddings)
|
|
# st.success("File Loaded Successfully!!")
|
|
|
|
# # Query through LLM
|
|
# question = st.text_input("Ask something from the file", placeholder="Find something similar to: ....this.... in the text?", disabled=not uploaded_file,)
|
|
# if question:
|
|
# similar_doc = db.similarity_search(question, k=1)
|
|
# context = similar_doc[0].page_content
|
|
# query_llm = LLMChain(llm=llm, prompt=prompt)
|
|
# response = query_llm.run({"context": context, "question": question})
|
|
# st.write(response) |