69 lines
1.6 KiB
Python
69 lines
1.6 KiB
Python
import time
|
|
import gradio as gr
|
|
from langchain.llms import GPT4All
|
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
from langchain.prompts import PromptTemplate
|
|
from langchain.chains import LLMChain
|
|
from gpt4all import GPT4All
|
|
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
|
|
# Callbacks support token-wise streaming
|
|
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
|
|
|
|
template = """Question: {question}
|
|
|
|
Answer: Let's think step by step."""
|
|
|
|
prompt = PromptTemplate(template=template, input_variables=["question"])
|
|
|
|
prompt_template = """
|
|
### System:
|
|
Answer the question based on the input below.
|
|
If the question cannot be answered using the information
|
|
provided answer with "I don't know". Ever answer in spanish.
|
|
|
|
### Input:
|
|
%s
|
|
|
|
### User:
|
|
Question:
|
|
%s
|
|
|
|
|
|
|
|
|
|
### Response:
|
|
Respuesta:"""
|
|
|
|
|
|
|
|
|
|
def FinderDb(query,dbs,filtred=False):
|
|
Sal = dbs.similarity_search_with_score(query,9)
|
|
page_content=[]
|
|
d=[]
|
|
if filtred:
|
|
lim=1.5
|
|
else:
|
|
lim=9000000
|
|
for output in Sal:
|
|
if output[1]<lim:
|
|
page_content.append(output[0].page_content)
|
|
d.append(output[1])
|
|
if len(page_content)<1:
|
|
return None,None
|
|
|
|
return "/n".join(page_content),d
|
|
|
|
def slow_echo(message, history):
|
|
text=""
|
|
context,d=FinderDb(query,db,filtred=True)
|
|
query=prompt_template%("Otaisa es la cuidad mas linda de parita",message)
|
|
print(query)
|
|
for i in model.generate(query, max_tokens=200, streaming=True,temp=0.1, top_k=40, top_p=0.4):
|
|
time.sleep(0.03)
|
|
text=text+i
|
|
yield text
|
|
|
|
gr.ChatInterface(slow_echo).queue().launch()
|
|
|