Funtionality Ok

2024-09-26 12:58:38 -05:00 · 2024-09-26 12:58:38 -05:00 · 664f2a35a0
parent c9cca52c74
commit 664f2a35a0
2 changed files with 238 additions and 35 deletions
--- a/README.md
+++ b/README.md
@ -0,0 +1,12 @@
+pip install llama-index
+pip install llama-index-llms-groq
+pip install llama-index-embeddings-huggingface
+pip install llama-parse
+pip install chromadb
+pip install llama-index-vector-stores-chroma
+pip install llama-index-embeddings-huggingface
+pip install python-fasthtml
+pip install grok
+pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
+
+
--- a/main.py
+++ b/main.py
@ -1,5 +1,30 @@
 from fasthtml.common import *
+from llama_index.core import SimpleDirectoryReader, Document,VectorStoreIndex
+from llama_index.core.node_parser import SimpleNodeParser
+from llama_index.core.text_splitter import TokenTextSplitter
+from llama_index.vector_stores.chroma import ChromaVectorStore
+from llama_index.core.storage.storage_context import StorageContext
+from llama_index.embeddings.huggingface import HuggingFaceEmbedding
+from llama_index.core import SummaryIndex
+from llama_index.llms.groq import Groq
+from chromadb import PersistentClient
+from llama_index.core import Settings
+from llama_index.embeddings.huggingface_api import (
+    HuggingFaceInferenceAPIEmbedding,
+)
+import chromadb
 import os
+import threading
+import time
+from llama_index.core.memory import ChatMemoryBuffer
+os.environ["GROQ_API_KEY"] = "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"
+
+
+llm_70b = Groq(model="llama-3.1-70b-versatile")
+memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
+
+Settings.llm = llm_70b
+


 app= FastHTML()
@ -14,54 +39,220 @@ def menuusers(users):
    for user in users:
        T.append(Option(user, value=str(user)) )
    return Form(
-    Select(*T,        
-        cls="selector",
-        _id="counter",
-        name="data",
-        **{'@click':"alert('Clicked');"},),Button("Submit"),action="/checkInfoSources", method="post")
+            Select(*T,name="user"),
+            Button("Submit",type="submit",id="buttonMenuuser"),
+            hx_post="/checkInfoSources",hx_swap="innerHTML",hx_target="#files" ,id="menuuser")

@app.post("/checkInfoSources")
-def checkInfoSources(data:str):
-    print(data)
-    with os.scandir("static/"+data) as files:
-        subdir = [CheckboxX(label=file.name,value="static/"+data+"/"+file.name) for file in files if  file.is_file()]
+def checkInfoSources(user:str):
+    global userdata
+    with os.scandir("static/"+user) as files:
+        subdir = [Option(file.name,value="static/"+user+"/"+file.name) for file in files if  file.is_file()]
+    userdata=user
+    print("Cambio",userdata)
    return Form(
-   Label(*subdir,        
-        cls="selector",
-        _id="counter",
-        hx_target="files",
-        name="data",
-        **{'@click':"alert('Clicked');"},),Button("Submit"),action="/process", method="post")
+    Select(
+    *subdir,name="data"),
+    Input(id="name-db", name="collection", placeholder="Enter a collection name"),
+        Button("Submit",type="submit"), hx_post="/createCollection",hx_swap="innerHTML",hx_target="#NewCollection" )


-@app.post("/process")
-def processData():
-    print()
-    pass
-    
-                
+def create_or_load_db(path="./chroma_db",collection="init",Nodes=None,model="sentence-transformers/all-mpnet-base-v2"):
+    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
+    #embed_model = HuggingFaceInferenceAPIEmbedding(
+    #model_name="BAAI/bge-small-en-v1.5",
+    #token="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",  # Optional
+    #)
+    db = chromadb.PersistentClient(path=path)

+    chroma_collection = db.get_or_create_collection(collection)
+
+    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
+
+    storage_context = StorageContext.from_defaults(vector_store=vector_store)
+    if Nodes:
+
+        index = VectorStoreIndex(
+            Nodes, storage_context=storage_context, embed_model=embed_model
+        )
+    else:
+        index = VectorStoreIndex.from_vector_store(
+            vector_store,
+            embed_model=embed_model,
+        )
+    return index
+
+def post_process_documents(documents):
+    processed_documents = []
+    n=0
+    print(len(documents))
+    for doc in documents:
+        # 1. Text cleaning
+        n+=1
+        print(n)
+        text = doc.text.lower()  # Convert to lowercase
+        # 2. Remove stopwords
+        stop_words = set("adssss")
+        tokens = text.split(" ")
+        filtered_text = ' '.join([word for word in tokens if word.lower() not in stop_words])
+        # 3. Custom metadata extraction (example)
+        metadata = doc.metadata.copy()
+        metadata['word_count'] = len(tokens)
+        # 4. Create a new document with processed text and updated metadata
+        processed_doc = Document(text=filtered_text, metadata=metadata)
+        processed_documents.append(processed_doc)
+    node_parser = SimpleNodeParser(chunk_size=200, chunk_overlap=30)
+    nodes = node_parser.get_nodes_from_documents(processed_documents)
+    return nodes
+
+@app.get("/listmodelactives")
+def listmodelactives():
+    try:
+        print(userdata)
+    except:
+        print("cambio")
+        return Div(id="options",hx_target="this",hx_swap="outerHTML",hx_get="/listmodelactives",hx_trigger="click from:#buttonMenuuser")
+    db = chromadb.PersistentClient(path="static/"+userdata+"/chroma_db")
 
+    files= db.list_collections()
+    collecs = [Option(file.name, value=file.name)for file in files]
+    
+    return Form(
+    Select(
+    *collecs,name="data"),
+    Button("Submit",type="submit"),
+    hx_post="/loadCollection",hx_swap="innerHTML",hx_target="#Infomodel")
+
+@app.post("/loadCollection")
+def loadCollection(data:str):
+    global index
+    index=create_or_load_db(path="static/"+userdata+"/chroma_db",collection=data,model="BAAI/bge-m3")
+    return P("El usuario %s colleccion %s"%(userdata,data))
+
+@app.post("/queryprompt") 
+def queryPrompt(question:str):
+    #index=load_create_db(collection="my_collection")
+    query_engine = index.as_query_engine()
+    response = query_engine.query(question)
+    return P(response)
+
+
+
+@app.post("/chatData")
+def questionChat(message:str):
+    chat_engine = index.as_chat_engine(
+        chat_mode="condense_plus_context",
+        memory=memory,
+        llm=llm_70b,
+        context_prompt=(
+            "You are a chatbot, able to have normal interactions, as well as talk"
+            " about an essay discussing IA and uses in leardeship."
+            "Here are the relevant documents for the context:\n"
+            "{context_str}"
+            "\nInstruction: Use the previous chat history, or the context above, to interact and help the user but only about tematic of the essay"
+        ),
+        verbose=False,
+    )
+    response = chat_engine.chat(message)
+    return P(message),P(response)
+
+@app.get("/SummarySources")
+def SummarySources():
+    with os.scandir("static/"+userdata) as files:
+        subdir = [Option(file.name,value="static/"+userdata+"/"+file.name) for file in files if  file.is_file()]
+    return Form("Este es muy caro para documentos grandes y tarda mucho",
+    Select(
+    *subdir,name="data"),
+    Input( name="query", placeholder="Enter a query"),
+        Button("Submit",type="submit"), hx_post="/SummaryMake",hx_swap="innerHTML",hx_target="#summaryR" )
+
+@app.post("/SummaryMake")
+def SummaryMake(data:str,query:str):
+    print(data,query)
+    docs = SimpleDirectoryReader(
+        input_files=[data]
+    ).load_data()
+    print("p1")
+    summary_index = SummaryIndex.from_documents(docs)
+    print("p2")
+    summary_engine = summary_index.as_query_engine()
+    print("p3")
+    response = summary_engine.query(
+    query
+    )
+    print("p4")
+    return P(response)
+
+
+
+@app.post("/createCollection")
+def createCollection(data:str,collection:str):
+    print("Reading")
+    docs = SimpleDirectoryReader(
+        input_files=[data]
+    ).load_data()
+    print("Process Documents")
+    Nodes=post_process_documents(docs)
+    print("create DB")
+    class MyThread(threading.Thread):
+        def run(self):
+             print("Hilo")
+             create_or_load_db(path="static/"+data.split("/")[1]+"/chroma_db",collection=collection,Nodes=Nodes,model="BAAI/bge-m3")
+
+    # create and start the thread
+    global t
+    t = MyThread()
+    t.start()
+    global t_time
+    t_time=time.time()
+    return Div("Iniciando carga de datos")
+ 
+@app.get("/is_busy")
+def is_busy():
+    try:
+        Busy= t.is_alive()  
+    except:
+        Busy=False
+    if not Busy:
+        return Busy 
+    else: 
+        return "Esta ocupados desde hace %s , este es un proceso largo"%(str(time.time()-t_time))
+
+

@app.get("/")
 def home():
-    page = Html(
-        Head(Title('Super tutor')),
-        Body(Div('Este es el sistema de super tutor, ', 
+    
+    page = Title('Super tutor'),Main(
+        Div('Este es el sistema de super tutor, ', 
                menuusers(listUsers()),
-                A('A link', href='https://example.com'), 
-                Img(src="https://placehold.co/200"),
-                Form(
-                    Select(
-                        Option("user", value=str("user"))),
-                    Button("Submit"),
-                    action="/", method="post"), cls='myclass')),
-                    Div(id="files"))
-                
-    
-    
+                #A('A link', href='https://example.com'), 
+                #Img(src="https://placehold.co/200"),
+                Div("Archivos",id="files"),
+                Div(id="NewCollection"),
+                Div("Estado",id="status",hx_target="this",hx_swap="innerHTML",hx_get="/is_busy",hx_trigger="every 60000ms"),
+                Div(
+                    Div(id="options",hx_target="this",hx_swap="outerHTML",hx_get="/listmodelactives",hx_trigger="click from:#buttonMenuuser delay:3s"),
+                    Div(id="Infomodel"),
+                    #Div("Resumen",Div(id="summary",hx_target="this",hx_swap="outerHTML",hx_get="/SummarySources",hx_trigger="click from:#buttonMenuuser"),Div(id="summaryR")),
+                    Div(
+                        Form(
+                            Input(id="question", name="message", placeholder="Enter a message"),
+                            Button("Submit",type="submit"), hx_post="/chatData",hx_swap="afterend",hx_target="#questionR" ),
+                        Div(id="questionR")
+                    ,id="questions"),
+                    Div(
+                        Form(
+                            Input(id="query", name="question", placeholder="Enter a query"),
+                            Button("Submit",type="submit"), hx_post="/queryprompt",hx_swap="innerHTML",hx_target="#queryR" ),
+                        Div(id="queryR"),
+                        id="query"),
+                    id="chatbot")
+                ))
    return page

+
+
 app.mount("/static", StaticFiles(directory="static"), name="static")