From 131c5e375cf0f0d7916dda4086c3054ebfe6650c Mon Sep 17 00:00:00 2001 From: marioggil Date: Wed, 20 Sep 2023 21:02:10 -0500 Subject: [PATCH] Clean Proyect only text 2 node --- .gitignore | 3 ++ FindinDB.py | 96 ++++++++++++++++++++++++++++++++++++++++++++++++ requirements.txt | 77 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 176 insertions(+) create mode 100644 .gitignore create mode 100644 FindinDB.py create mode 100644 requirements.txt diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..23a926e --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +motor.sqlite +Copies/* +names/* diff --git a/FindinDB.py b/FindinDB.py new file mode 100644 index 0000000..67eb0e6 --- /dev/null +++ b/FindinDB.py @@ -0,0 +1,96 @@ +import gradio as gr +from faiss import write_index, read_index + +from langchain import PromptTemplate +from langchain.chains import LLMChain +from langchain.document_loaders import TextLoader +from langchain.text_splitter import CharacterTextSplitter +from langchain.vectorstores import FAISS +from langchain.text_splitter import RecursiveCharacterTextSplitter +from langchain.document_loaders import UnstructuredFileLoader +from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader +from langchain.document_loaders import UnstructuredURLLoader +from langchain.document_loaders.csv_loader import CSVLoader +from langchain import LLMChain +from langchain.llms import GPT4All +from langchain.embeddings import GPT4AllEmbeddings +from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler +from langchain.callbacks.base import BaseCallbackManager +from langchain.document_loaders import DataFrameLoader +import pandas as pd +import sqlite3 +def loadModels(): + #model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin") + callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()]) + llm = GPT4All(model="orca-mini-3b.ggmlv3.q4_0.bin",temp=0.1,streaming=True)#callback_manager=callback_manager, verbose=True,repeat_last_n=0 + embeddings = GPT4AllEmbeddings() + return llm, embeddings +llm,emb=loadModels() + + +con = sqlite3.connect("motor.sqlite") +copies_df = pd.read_sql_query("SELECT * from copies", con) +copiesT = copies_df[copies_df.copy_start =="T"] +copiesT=copiesT[["copy_message","id","name"]] +data = copiesT +B=DataFrameLoader(data,page_content_column="copy_message") +B2=DataFrameLoader(data,page_content_column="name") +documents=B.load() +documents2=B2.load() + +try: + db=FAISS.load_local(folder_path="Copies",embeddings=emb) + +except: + db = FAISS.from_documents(documents, emb) + FAISS.save_local(db,folder_path="Copies") + +try: + db2=FAISS.load_local(folder_path="names",embeddings=emb) + +except: + db2 = FAISS.from_documents(documents2, emb) + FAISS.save_local(db2,folder_path="names") + +def FinderDb(query,dbs): + Sal = dbs.similarity_search_with_score(query,3) + page_content=[] + id=[] + d=[] + for output in Sal: + page_content.append(output[0].page_content) + id.append(output[0].metadata["id"]) + d.append(output[1]) + espacio=""" + + +######################## + + + +""" + page_content=espacio.join(page_content) + return page_content,d,id + +def QARequest(Pregunta): + query = Pregunta + page_content,d,id=FinderDb(query,db) + page_content2,d2,id2=FinderDb(query,db2) + return page_content,d,id,page_content2,d2,id2 + + + +with gr.Blocks() as demo: + Pregunta = gr.Textbox(label="Pregunta") + #Respuesta = gr.Textbox(label="Respuesta") + #id = gr.Textbox(label="id") + #metrica=gr.Textbox(label="metrica") + Respuesta2 = gr.Textbox(label="Respuesta2") + id2 = gr.Textbox(label="id2") + metrica2=gr.Textbox(label="metrica2") + Enviar_btn = gr.Button("Responder") + + Enviar_btn.click(fn=QARequest, inputs=Pregunta, outputs=[Respuesta2,metrica2,id2], api_name="Respuestas") # Respuesta,metrica,id, + +demo.launch() # + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..ab32aee --- /dev/null +++ b/requirements.txt @@ -0,0 +1,77 @@ +aiofiles==23.2.1 +aiohttp==3.8.5 +aiosignal==1.3.1 +altair==5.1.1 +annotated-types==0.5.0 +anyio==3.7.1 +async-timeout==4.0.3 +attrs==23.1.0 +certifi==2023.7.22 +charset-normalizer==3.2.0 +click==8.1.7 +contourpy==1.1.1 +cycler==0.11.0 +dataclasses-json==0.6.0 +exceptiongroup==1.1.3 +faiss-cpu==1.7.4 +fastapi==0.103.1 +ffmpy==0.3.1 +filelock==3.12.4 +fonttools==4.42.1 +frozenlist==1.4.0 +fsspec==2023.9.1 +gpt4all==1.0.12 +gradio==3.44.4 +gradio_client==0.5.1 +greenlet==2.0.2 +h11==0.14.0 +httpcore==0.18.0 +httpx==0.25.0 +huggingface-hub==0.17.2 +idna==3.4 +importlib-resources==6.1.0 +Jinja2==3.1.2 +jsonschema==4.19.1 +jsonschema-specifications==2023.7.1 +kiwisolver==1.4.5 +langchain==0.0.297 +langsmith==0.0.38 +MarkupSafe==2.1.3 +marshmallow==3.20.1 +matplotlib==3.7.3 +multidict==6.0.4 +mypy-extensions==1.0.0 +numexpr==2.8.6 +numpy==1.24.4 +orjson==3.9.7 +packaging==23.1 +pandas==2.0.3 +Pillow==10.0.1 +pkgutil_resolve_name==1.3.10 +pydantic==2.3.0 +pydantic_core==2.6.3 +pydub==0.25.1 +pyparsing==3.1.1 +python-dateutil==2.8.2 +python-multipart==0.0.6 +pytz==2023.3.post1 +PyYAML==6.0.1 +referencing==0.30.2 +requests==2.31.0 +rpds-py==0.10.3 +semantic-version==2.10.0 +six==1.16.0 +sniffio==1.3.0 +SQLAlchemy==2.0.21 +starlette==0.27.0 +tenacity==8.2.3 +toolz==0.12.0 +tqdm==4.66.1 +typing-inspect==0.9.0 +typing_extensions==4.8.0 +tzdata==2023.3 +urllib3==2.0.5 +uvicorn==0.23.2 +websockets==11.0.3 +yarl==1.9.2 +zipp==3.17.0