Llm2Chat/app2.py

69 lines
1.6 KiB
Python

import time
import gradio as gr
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from gpt4all import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# Callbacks support token-wise streaming
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
template = """Question: {question}
Answer: Let's think step by step."""
prompt = PromptTemplate(template=template, input_variables=["question"])
prompt_template = """
### System:
Answer the question based on the input below.
If the question cannot be answered using the information
provided answer with "I don't know". Ever answer in spanish.
### Input:
%s
### User:
Question:
%s
### Response:
Respuesta:"""
def FinderDb(query,dbs,filtred=False):
Sal = dbs.similarity_search_with_score(query,9)
page_content=[]
d=[]
if filtred:
lim=1.5
else:
lim=9000000
for output in Sal:
if output[1]<lim:
page_content.append(output[0].page_content)
d.append(output[1])
if len(page_content)<1:
return None,None
return "/n".join(page_content),d
def slow_echo(message, history):
text=""
context,d=FinderDb(query,db,filtred=True)
query=prompt_template%("Otaisa es la cuidad mas linda de parita",message)
print(query)
for i in model.generate(query, max_tokens=200, streaming=True,temp=0.1, top_k=40, top_p=0.4):
time.sleep(0.03)
text=text+i
yield text
gr.ChatInterface(slow_echo).queue().launch()