Funtionality Ok
This commit is contained in:
parent
c9cca52c74
commit
664f2a35a0
12
README.md
12
README.md
|
@ -0,0 +1,12 @@
|
||||||
|
pip install llama-index
|
||||||
|
pip install llama-index-llms-groq
|
||||||
|
pip install llama-index-embeddings-huggingface
|
||||||
|
pip install llama-parse
|
||||||
|
pip install chromadb
|
||||||
|
pip install llama-index-vector-stores-chroma
|
||||||
|
pip install llama-index-embeddings-huggingface
|
||||||
|
pip install python-fasthtml
|
||||||
|
pip install grok
|
||||||
|
pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
|
||||||
|
|
||||||
|
|
257
main.py
257
main.py
|
@ -1,5 +1,30 @@
|
||||||
from fasthtml.common import *
|
from fasthtml.common import *
|
||||||
|
from llama_index.core import SimpleDirectoryReader, Document,VectorStoreIndex
|
||||||
|
from llama_index.core.node_parser import SimpleNodeParser
|
||||||
|
from llama_index.core.text_splitter import TokenTextSplitter
|
||||||
|
from llama_index.vector_stores.chroma import ChromaVectorStore
|
||||||
|
from llama_index.core.storage.storage_context import StorageContext
|
||||||
|
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
|
||||||
|
from llama_index.core import SummaryIndex
|
||||||
|
from llama_index.llms.groq import Groq
|
||||||
|
from chromadb import PersistentClient
|
||||||
|
from llama_index.core import Settings
|
||||||
|
from llama_index.embeddings.huggingface_api import (
|
||||||
|
HuggingFaceInferenceAPIEmbedding,
|
||||||
|
)
|
||||||
|
import chromadb
|
||||||
import os
|
import os
|
||||||
|
import threading
|
||||||
|
import time
|
||||||
|
from llama_index.core.memory import ChatMemoryBuffer
|
||||||
|
os.environ["GROQ_API_KEY"] = "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"
|
||||||
|
|
||||||
|
|
||||||
|
llm_70b = Groq(model="llama-3.1-70b-versatile")
|
||||||
|
memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
|
||||||
|
|
||||||
|
Settings.llm = llm_70b
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
app= FastHTML()
|
app= FastHTML()
|
||||||
|
@ -14,54 +39,220 @@ def menuusers(users):
|
||||||
for user in users:
|
for user in users:
|
||||||
T.append(Option(user, value=str(user)) )
|
T.append(Option(user, value=str(user)) )
|
||||||
return Form(
|
return Form(
|
||||||
Select(*T,
|
Select(*T,name="user"),
|
||||||
cls="selector",
|
Button("Submit",type="submit",id="buttonMenuuser"),
|
||||||
_id="counter",
|
hx_post="/checkInfoSources",hx_swap="innerHTML",hx_target="#files" ,id="menuuser")
|
||||||
name="data",
|
|
||||||
**{'@click':"alert('Clicked');"},),Button("Submit"),action="/checkInfoSources", method="post")
|
|
||||||
|
|
||||||
@app.post("/checkInfoSources")
|
@app.post("/checkInfoSources")
|
||||||
def checkInfoSources(data:str):
|
def checkInfoSources(user:str):
|
||||||
print(data)
|
global userdata
|
||||||
with os.scandir("static/"+data) as files:
|
with os.scandir("static/"+user) as files:
|
||||||
subdir = [CheckboxX(label=file.name,value="static/"+data+"/"+file.name) for file in files if file.is_file()]
|
subdir = [Option(file.name,value="static/"+user+"/"+file.name) for file in files if file.is_file()]
|
||||||
|
userdata=user
|
||||||
|
print("Cambio",userdata)
|
||||||
return Form(
|
return Form(
|
||||||
Label(*subdir,
|
Select(
|
||||||
cls="selector",
|
*subdir,name="data"),
|
||||||
_id="counter",
|
Input(id="name-db", name="collection", placeholder="Enter a collection name"),
|
||||||
hx_target="files",
|
Button("Submit",type="submit"), hx_post="/createCollection",hx_swap="innerHTML",hx_target="#NewCollection" )
|
||||||
name="data",
|
|
||||||
**{'@click':"alert('Clicked');"},),Button("Submit"),action="/process", method="post")
|
|
||||||
|
|
||||||
|
|
||||||
@app.post("/process")
|
def create_or_load_db(path="./chroma_db",collection="init",Nodes=None,model="sentence-transformers/all-mpnet-base-v2"):
|
||||||
def processData():
|
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
|
||||||
print()
|
#embed_model = HuggingFaceInferenceAPIEmbedding(
|
||||||
pass
|
#model_name="BAAI/bge-small-en-v1.5",
|
||||||
|
#token="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", # Optional
|
||||||
|
#)
|
||||||
|
db = chromadb.PersistentClient(path=path)
|
||||||
|
|
||||||
|
chroma_collection = db.get_or_create_collection(collection)
|
||||||
|
|
||||||
|
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
|
||||||
|
|
||||||
|
storage_context = StorageContext.from_defaults(vector_store=vector_store)
|
||||||
|
if Nodes:
|
||||||
|
|
||||||
|
index = VectorStoreIndex(
|
||||||
|
Nodes, storage_context=storage_context, embed_model=embed_model
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
index = VectorStoreIndex.from_vector_store(
|
||||||
|
vector_store,
|
||||||
|
embed_model=embed_model,
|
||||||
|
)
|
||||||
|
return index
|
||||||
|
|
||||||
|
def post_process_documents(documents):
|
||||||
|
processed_documents = []
|
||||||
|
n=0
|
||||||
|
print(len(documents))
|
||||||
|
for doc in documents:
|
||||||
|
# 1. Text cleaning
|
||||||
|
n+=1
|
||||||
|
print(n)
|
||||||
|
text = doc.text.lower() # Convert to lowercase
|
||||||
|
# 2. Remove stopwords
|
||||||
|
stop_words = set("adssss")
|
||||||
|
tokens = text.split(" ")
|
||||||
|
filtered_text = ' '.join([word for word in tokens if word.lower() not in stop_words])
|
||||||
|
# 3. Custom metadata extraction (example)
|
||||||
|
metadata = doc.metadata.copy()
|
||||||
|
metadata['word_count'] = len(tokens)
|
||||||
|
# 4. Create a new document with processed text and updated metadata
|
||||||
|
processed_doc = Document(text=filtered_text, metadata=metadata)
|
||||||
|
processed_documents.append(processed_doc)
|
||||||
|
node_parser = SimpleNodeParser(chunk_size=200, chunk_overlap=30)
|
||||||
|
nodes = node_parser.get_nodes_from_documents(processed_documents)
|
||||||
|
return nodes
|
||||||
|
|
||||||
|
@app.get("/listmodelactives")
|
||||||
|
def listmodelactives():
|
||||||
|
try:
|
||||||
|
print(userdata)
|
||||||
|
except:
|
||||||
|
print("cambio")
|
||||||
|
return Div(id="options",hx_target="this",hx_swap="outerHTML",hx_get="/listmodelactives",hx_trigger="click from:#buttonMenuuser")
|
||||||
|
db = chromadb.PersistentClient(path="static/"+userdata+"/chroma_db")
|
||||||
|
|
||||||
|
files= db.list_collections()
|
||||||
|
collecs = [Option(file.name, value=file.name)for file in files]
|
||||||
|
|
||||||
|
return Form(
|
||||||
|
Select(
|
||||||
|
*collecs,name="data"),
|
||||||
|
Button("Submit",type="submit"),
|
||||||
|
hx_post="/loadCollection",hx_swap="innerHTML",hx_target="#Infomodel")
|
||||||
|
|
||||||
|
@app.post("/loadCollection")
|
||||||
|
def loadCollection(data:str):
|
||||||
|
global index
|
||||||
|
index=create_or_load_db(path="static/"+userdata+"/chroma_db",collection=data,model="BAAI/bge-m3")
|
||||||
|
return P("El usuario %s colleccion %s"%(userdata,data))
|
||||||
|
|
||||||
|
@app.post("/queryprompt")
|
||||||
|
def queryPrompt(question:str):
|
||||||
|
#index=load_create_db(collection="my_collection")
|
||||||
|
query_engine = index.as_query_engine()
|
||||||
|
response = query_engine.query(question)
|
||||||
|
return P(response)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/chatData")
|
||||||
|
def questionChat(message:str):
|
||||||
|
chat_engine = index.as_chat_engine(
|
||||||
|
chat_mode="condense_plus_context",
|
||||||
|
memory=memory,
|
||||||
|
llm=llm_70b,
|
||||||
|
context_prompt=(
|
||||||
|
"You are a chatbot, able to have normal interactions, as well as talk"
|
||||||
|
" about an essay discussing IA and uses in leardeship."
|
||||||
|
"Here are the relevant documents for the context:\n"
|
||||||
|
"{context_str}"
|
||||||
|
"\nInstruction: Use the previous chat history, or the context above, to interact and help the user but only about tematic of the essay"
|
||||||
|
),
|
||||||
|
verbose=False,
|
||||||
|
)
|
||||||
|
response = chat_engine.chat(message)
|
||||||
|
return P(message),P(response)
|
||||||
|
|
||||||
|
@app.get("/SummarySources")
|
||||||
|
def SummarySources():
|
||||||
|
with os.scandir("static/"+userdata) as files:
|
||||||
|
subdir = [Option(file.name,value="static/"+userdata+"/"+file.name) for file in files if file.is_file()]
|
||||||
|
return Form("Este es muy caro para documentos grandes y tarda mucho",
|
||||||
|
Select(
|
||||||
|
*subdir,name="data"),
|
||||||
|
Input( name="query", placeholder="Enter a query"),
|
||||||
|
Button("Submit",type="submit"), hx_post="/SummaryMake",hx_swap="innerHTML",hx_target="#summaryR" )
|
||||||
|
|
||||||
|
@app.post("/SummaryMake")
|
||||||
|
def SummaryMake(data:str,query:str):
|
||||||
|
print(data,query)
|
||||||
|
docs = SimpleDirectoryReader(
|
||||||
|
input_files=[data]
|
||||||
|
).load_data()
|
||||||
|
print("p1")
|
||||||
|
summary_index = SummaryIndex.from_documents(docs)
|
||||||
|
print("p2")
|
||||||
|
summary_engine = summary_index.as_query_engine()
|
||||||
|
print("p3")
|
||||||
|
response = summary_engine.query(
|
||||||
|
query
|
||||||
|
)
|
||||||
|
print("p4")
|
||||||
|
return P(response)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
@app.post("/createCollection")
|
||||||
|
def createCollection(data:str,collection:str):
|
||||||
|
print("Reading")
|
||||||
|
docs = SimpleDirectoryReader(
|
||||||
|
input_files=[data]
|
||||||
|
).load_data()
|
||||||
|
print("Process Documents")
|
||||||
|
Nodes=post_process_documents(docs)
|
||||||
|
print("create DB")
|
||||||
|
class MyThread(threading.Thread):
|
||||||
|
def run(self):
|
||||||
|
print("Hilo")
|
||||||
|
create_or_load_db(path="static/"+data.split("/")[1]+"/chroma_db",collection=collection,Nodes=Nodes,model="BAAI/bge-m3")
|
||||||
|
|
||||||
|
# create and start the thread
|
||||||
|
global t
|
||||||
|
t = MyThread()
|
||||||
|
t.start()
|
||||||
|
global t_time
|
||||||
|
t_time=time.time()
|
||||||
|
return Div("Iniciando carga de datos")
|
||||||
|
|
||||||
|
@app.get("/is_busy")
|
||||||
|
def is_busy():
|
||||||
|
try:
|
||||||
|
Busy= t.is_alive()
|
||||||
|
except:
|
||||||
|
Busy=False
|
||||||
|
if not Busy:
|
||||||
|
return Busy
|
||||||
|
else:
|
||||||
|
return "Esta ocupados desde hace %s , este es un proceso largo"%(str(time.time()-t_time))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@app.get("/")
|
@app.get("/")
|
||||||
def home():
|
def home():
|
||||||
page = Html(
|
|
||||||
Head(Title('Super tutor')),
|
page = Title('Super tutor'),Main(
|
||||||
Body(Div('Este es el sistema de super tutor, ',
|
Div('Este es el sistema de super tutor, ',
|
||||||
menuusers(listUsers()),
|
menuusers(listUsers()),
|
||||||
A('A link', href='https://example.com'),
|
#A('A link', href='https://example.com'),
|
||||||
Img(src="https://placehold.co/200"),
|
#Img(src="https://placehold.co/200"),
|
||||||
Form(
|
Div("Archivos",id="files"),
|
||||||
Select(
|
Div(id="NewCollection"),
|
||||||
Option("user", value=str("user"))),
|
Div("Estado",id="status",hx_target="this",hx_swap="innerHTML",hx_get="/is_busy",hx_trigger="every 60000ms"),
|
||||||
Button("Submit"),
|
Div(
|
||||||
action="/", method="post"), cls='myclass')),
|
Div(id="options",hx_target="this",hx_swap="outerHTML",hx_get="/listmodelactives",hx_trigger="click from:#buttonMenuuser delay:3s"),
|
||||||
Div(id="files"))
|
Div(id="Infomodel"),
|
||||||
|
#Div("Resumen",Div(id="summary",hx_target="this",hx_swap="outerHTML",hx_get="/SummarySources",hx_trigger="click from:#buttonMenuuser"),Div(id="summaryR")),
|
||||||
|
Div(
|
||||||
|
Form(
|
||||||
|
Input(id="question", name="message", placeholder="Enter a message"),
|
||||||
|
Button("Submit",type="submit"), hx_post="/chatData",hx_swap="afterend",hx_target="#questionR" ),
|
||||||
|
Div(id="questionR")
|
||||||
|
,id="questions"),
|
||||||
|
Div(
|
||||||
|
Form(
|
||||||
|
Input(id="query", name="question", placeholder="Enter a query"),
|
||||||
|
Button("Submit",type="submit"), hx_post="/queryprompt",hx_swap="innerHTML",hx_target="#queryR" ),
|
||||||
|
Div(id="queryR"),
|
||||||
|
id="query"),
|
||||||
|
id="chatbot")
|
||||||
|
))
|
||||||
return page
|
return page
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
app.mount("/static", StaticFiles(directory="static"), name="static")
|
app.mount("/static", StaticFiles(directory="static"), name="static")
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue