Llm2Chat/app2.py

import time
import gradio as gr
from langchain.llms import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
from langchain.prompts import PromptTemplate
from langchain.chains import LLMChain
from gpt4all import GPT4All
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
# Callbacks support token-wise streaming
model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")

template = """Question: {question}

Answer: Let's think step by step."""

prompt = PromptTemplate(template=template, input_variables=["question"])

prompt_template = """
### System:
Answer the question based on the input below.
If the question cannot be answered using the information
provided answer with "I don't know". Ever answer in spanish.

### Input:
%s

### User:
Question:
%s


### Response:
Respuesta:"""


def FinderDb(query,dbs,filtred=False):
    Sal = dbs.similarity_search_with_score(query,9)
    page_content=[]
    d=[]
    if filtred:
        lim=1.5
    else:
        lim=9000000
    for output in Sal:
        if output[1]<lim:
            page_content.append(output[0].page_content)
            d.append(output[1])
    if len(page_content)<1:
        return None,None

    return "/n".join(page_content),d

def slow_echo(message, history):
    text=""
    context,d=FinderDb(query,db,filtred=True)
    query=prompt_template%("Otaisa es la cuidad mas linda de parita",message)
    print(query)
    for i in model.generate(query, max_tokens=200, streaming=True,temp=0.1, top_k=40, top_p=0.4):
        time.sleep(0.03)
        text=text+i
        yield text

gr.ChatInterface(slow_echo).queue().launch()