Init in server

2024-10-07 18:30:22 -05:00 · 2024-10-07 18:30:22 -05:00 · c4b727ab19
parent 664f2a35a0
commit c4b727ab19
13 changed files with 206 additions and 56 deletions
--- a/main.py
+++ b/main.py
@ -6,28 +6,55 @@ from llama_index.vector_stores.chroma import ChromaVectorStore
 from llama_index.core.storage.storage_context import StorageContext
 from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 from llama_index.core import SummaryIndex
-from llama_index.llms.groq import Groq
+from llama_index.llms.groq import Groq as GroqLLamaIndex
 from chromadb import PersistentClient
 from llama_index.core import Settings
 from llama_index.embeddings.huggingface_api import (
    HuggingFaceInferenceAPIEmbedding,
 )
+
 import chromadb
 import os
 import threading
 import time
 from llama_index.core.memory import ChatMemoryBuffer
-os.environ["GROQ_API_KEY"] = "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"
+import json
+from llama_index.llms.ollama import Ollama
+from llama_index.core.base.response.schema import Response
+from groq import Groq
+import shutil
+pwd = os.getcwd()
+def extractConfig(nameModel="SystemData",relPath=os.path.join(pwd,"conf/experiment_config.json"),dataOut="keyantrophics"):
+    configPath=os.path.join(os.getcwd(),relPath)
+    with open(configPath, 'r', encoding='utf-8') as file:
+        config = json.load(file)[nameModel]
+    Output= config[dataOut]
+    return Output
+
+keygroq=extractConfig(nameModel="SystemData",dataOut="keygroq")
+client = Groq(api_key=keygroq)


-llm_70b = Groq(model="llama-3.1-70b-versatile")
+
+
+
+os.environ["GROQ_API_KEY"] = keygroq
+
+
+llm_70b = GroqLLamaIndex(model="llama-3.1-70b-versatile")#llm_70b#
+llm_localLlamma = llm_70b#Ollama(model="llama3.2")
 memory = ChatMemoryBuffer.from_defaults(token_limit=3900)
-
-Settings.llm = llm_70b
+model_emb="sentence-transformers/multi-qa-mpnet-base-dot-v1"
+Settings.llm = llm_localLlamma 
+
+
+
+gridlink = Link(rel="stylesheet", href="https://cdnjs.cloudflare.com/ajax/libs/flexboxgrid/6.3.1/flexboxgrid.min.css", type="text/css")
+
+app= FastHTML(hdrs=(picolink, gridlink))



-app= FastHTML()
 def listUsers():
    with os.scandir("static") as files:
        subdir = [file.name for file in files if  file.is_dir()]
@ -39,26 +66,29 @@ def menuusers(users):
    for user in users:
        T.append(Option(user, value=str(user)) )
    return Form(
-            Select(*T,name="user"),
-            Button("Submit",type="submit",id="buttonMenuuser"),
-            hx_post="/checkInfoSources",hx_swap="innerHTML",hx_target="#files" ,id="menuuser")
+            H3("Seleccionar usuario",cls="col-xs-3"),
+            Select(*T,name="user",cls="col-xs-3"),
+            Button("Submit",type="submit",id="buttonMenuuser",cls="col-xs-6"),
+            hx_post="/checkInfoSources",hx_swap="innerHTML",hx_target="#files" ,id="menuuser",cls="row middle-xs")

@app.post("/checkInfoSources")
 def checkInfoSources(user:str):
    global userdata
    with os.scandir("static/"+user) as files:
-        subdir = [Option(file.name,value="static/"+user+"/"+file.name) for file in files if  file.is_file()]
+        subdir = [Option(file.name,value="static/"+user+"/"+file.name) for file in files if  (file.is_dir() and file.name!="chroma_db") ]
+    
    userdata=user
    print("Cambio",userdata)
    return Form(
+    H3("Grupos de archivos",cls="col-xs-3"),
    Select(
-    *subdir,name="data"),
-    Input(id="name-db", name="collection", placeholder="Enter a collection name"),
-        Button("Submit",type="submit"), hx_post="/createCollection",hx_swap="innerHTML",hx_target="#NewCollection" )
+    *subdir,name="data",cls="col-xs-3"),
+    Input(id="name-db", name="collection", placeholder="Enter a collection name",cls="col-xs-4"),
+        Button("Submit",type="submit",cls="col-xs-2"), hx_post="/createCollection",hx_swap="innerHTML",hx_target="#status" ,cls="row middle-xs")


-def create_or_load_db(path="./chroma_db",collection="init",Nodes=None,model="sentence-transformers/all-mpnet-base-v2"):
-    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-base-en-v1.5")
+def create_or_load_db(path="./chroma_db",collection="init",Nodes=None,modelT=model_emb):
+    embed_model = HuggingFaceEmbedding(model_name=modelT)
    #embed_model = HuggingFaceInferenceAPIEmbedding(
    #model_name="BAAI/bge-small-en-v1.5",
    #token="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",  # Optional
@ -101,7 +131,7 @@ def post_process_documents(documents):
        # 4. Create a new document with processed text and updated metadata
        processed_doc = Document(text=filtered_text, metadata=metadata)
        processed_documents.append(processed_doc)
-    node_parser = SimpleNodeParser(chunk_size=200, chunk_overlap=30)
+    node_parser = SimpleNodeParser(chunk_size=360, chunk_overlap=20)
    nodes = node_parser.get_nodes_from_documents(processed_documents)
    return nodes

@ -119,42 +149,151 @@ def listmodelactives():
    
    return Form(
    Select(
-    *collecs,name="data"),
-    Button("Submit",type="submit"),
+    *collecs,name="data",cls="col-xs-6"),
+    Button("Submit",type="submit",cls="col-xs-6"),
    hx_post="/loadCollection",hx_swap="innerHTML",hx_target="#Infomodel")

@app.post("/loadCollection")
 def loadCollection(data:str):
    global index
-    index=create_or_load_db(path="static/"+userdata+"/chroma_db",collection=data,model="BAAI/bge-m3")
+    index=create_or_load_db(path="static/"+userdata+"/chroma_db",collection=data,modelT=model_emb)
    return P("El usuario %s colleccion %s"%(userdata,data))

@app.post("/queryprompt") 
 def queryPrompt(question:str):
    #index=load_create_db(collection="my_collection")
-    query_engine = index.as_query_engine()
-    response = query_engine.query(question)
-    return P(response)
+    
+    query_engine = index.as_query_engine(similarity_top_k=15,vector_store_query_mode="default",response_mode="tree_summarize")
+    summary_prompt = (
+        "Por favor, genera un resumen completo y detallado del material dado. "
+        "Incluye los principales temas, argumentos y conclusiones. "
+        "Estructura el resumen de manera coherente y organizada."
+    )
+    tematic_prompt =(        
+        "Por favor, genera un texto donde se menciones la tematica que trata el material dado. "
+        "Incluye una tematica general y un indice por temas tratados "
+        "Estructura el texto de manera coherente y organizada."
+
+    )
+    issues_prompt =(        
+        "Por favor, genera un texto donde se menciones tomando en cuenta el material y el contenido de manera detallada que mejoras  podrias realizar al material incluyendo nuevos datos o corrigiendo la informacion proporcionada"
+        "Esta mejora hazla pensado paso a paso y de manera muy cuidadosa, tienes que dar ejemplos del materiar o referenciar directamante el texto a mejorar mencioando la causa de la mejora"
+        "Estructura el texto de manera coherente y organizada."
+
+    )
+    Question_prompt =(        
+        "Por favor, genera un texto donde indiques preguntas sobre el material que cuando sean respondidas capturen el punto central y puntos importantes del texto"
+        "Estas preguntas hazla pensado paso a paso y de manera muy cuidadosa."
+        "Estructura el texto de manera coherente y organizada."
+
+    )
+    response = query_engine.query(summary_prompt)
+    response2 = query_engine.query(tematic_prompt)
+    response3 = query_engine.query(issues_prompt)
+    response4 = query_engine.query(Question_prompt)
+    Output="<H1>Summary</H1>"+str(response)+"<H1>Tematic</H1>"+str(response2)+"<H1>Issues</H1>"+str(response3)+"<H1>Questions</H1>"+str(response4)
+
+    return Output



@app.post("/chatData")
 def questionChat(message:str):
-    chat_engine = index.as_chat_engine(
-        chat_mode="condense_plus_context",
-        memory=memory,
-        llm=llm_70b,
-        context_prompt=(
-            "You are a chatbot, able to have normal interactions, as well as talk"
-            " about an essay discussing IA and uses in leardeship."
+    import logging
+    logging.basicConfig(level=logging.INFO)
+    contextT=(
+            "You are a world-class AI system. You respond to the questions about the context"
            "Here are the relevant documents for the context:\n"
            "{context_str}"
-            "\nInstruction: Use the previous chat history, or the context above, to interact and help the user but only about tematic of the essay"
-        ),
-        verbose=False,
+            "\nInstruction: Use the previous chat history, or the context above, carefully examine the given context, to interact and help the user but only about {question}"
+            "Never mention the document of reference, talk in first person"        )
+    #contextT="Por favor vuelve a repetir el siguiente contenido como tu respuesta:{question}" 
+    #'chat_history', 'question'
+    query_engine = index.as_query_engine()
+    chat_engine = index.as_chat_engine(
+        chat_mode="condense_plus_context",
+        query_engine=query_engine,
+        memory=memory,
+        llm=llm_localLlamma,
+        context_prompt=contextT,
+        similarity_top_k=5,
+        verbose=True,
    )
    response = chat_engine.chat(message)
-    return P(message),P(response)
+    ContextNodes=""
+    for node in response.source_nodes:
+        ContextNodes=ContextNodes+node.node.text+"\n"
+        #print(f"Texto del nodo: {node.node.text}")
+        #print(f"Puntuación de relevancia: {node.score}")
+        #print("---")
+
+
+    
+    
+    NewPrompt="""The previous response is:
+%s
+The previous context is:
+%s
+Evaluate the coherence and accuracy of previous response to respond %s in this evaluation.
+Verificate if previous context is related to the previous response, if not, say that you do not have information about that issue 
+The format of output is a json with keys 'coherencia', 'exactitud', 'relacion_con_el_contexto' and 'comentario' .
+'coherencia', 'exactitud', 'relacion_con_el_contexto' are numeric variables with max value is 10"""%(response,ContextNodes,message)
+        
+
+
+    print(chat_engine.__dict__)
+
+    chat_completion = client.chat.completions.create(
+    #
+    # Required parameters
+    #
+    messages=[
+        # Set an optional system message. This sets the behavior of the
+        # assistant and can be used to provide specific instructions for
+        # how it should behave throughout the conversation.
+        {
+            "role": "system",
+            "content": "you are a helpful assistant."
+        },
+        # Set a user message for the assistant to respond to.
+        {
+            "role": "user",
+            "content": NewPrompt,
+        }
+    ],
+
+    # The language model which will generate the completion.
+    model="llama-3.1-70b-versatile",
+
+    #
+    # Optional parameters
+    #
+
+    # Controls randomness: lowering results in less random completions.
+    # As the temperature approaches zero, the model will become deterministic
+    # and repetitive.
+    temperature=0.5,
+
+    # The maximum number of tokens to generate. Requests can use up to
+    # 32,768 tokens shared between prompt and completion.
+    max_tokens=1024,
+
+    # Controls diversity via nucleus sampling: 0.5 means half of all
+    # likelihood-weighted options are considered.
+    top_p=1,
+
+    # A stop sequence is a predefined or user-specified text string that
+    # signals an AI to stop generating content, ensuring its responses
+    # remain focused and concise. Examples include punctuation marks and
+    # markers like "[end]".
+    stop=None,
+
+    # If set, partial message deltas will be sent.
+    stream=False,
+    )
+
+
+    return P(message),P(response),P(chat_completion.choices[0].message.content)

@app.get("/SummarySources")
 def SummarySources():
@ -170,7 +309,7 @@ def SummarySources():
 def SummaryMake(data:str,query:str):
    print(data,query)
    docs = SimpleDirectoryReader(
-        input_files=[data]
+        input_dir=[data]
    ).load_data()
    print("p1")
    summary_index = SummaryIndex.from_documents(docs)
@ -189,7 +328,7 @@ def SummaryMake(data:str,query:str):
 def createCollection(data:str,collection:str):
    print("Reading")
    docs = SimpleDirectoryReader(
-        input_files=[data]
+        input_dir=data
    ).load_data()
    print("Process Documents")
    Nodes=post_process_documents(docs)
@ -197,7 +336,7 @@ def createCollection(data:str,collection:str):
    class MyThread(threading.Thread):
        def run(self):
             print("Hilo")
-             create_or_load_db(path="static/"+data.split("/")[1]+"/chroma_db",collection=collection,Nodes=Nodes,model="BAAI/bge-m3")
+             create_or_load_db(path="static/"+data.split("/")[1]+"/chroma_db",collection=collection,Nodes=Nodes,modelT=model_emb)

    # create and start the thread
    global t
@ -214,9 +353,9 @@ def is_busy():
    except:
        Busy=False
    if not Busy:
-        return Busy 
+        return H2("Estado: Disponible para carga de datos")
    else: 
-        return "Esta ocupados desde hace %s , este es un proceso largo"%(str(time.time()-t_time))
+        return H2("Esta ocupados desde hace %s , este es un proceso largo"%(str(time.time()-t_time)))



@ -224,38 +363,49 @@ def is_busy():
 def home():
    
    page = Title('Super tutor'),Main(
-        Div('Este es el sistema de super tutor, ', 
+        Div(H1('Super tutor'), 
                menuusers(listUsers()),
                #A('A link', href='https://example.com'), 
                #Img(src="https://placehold.co/200"),
                Div("Archivos",id="files"),
-                Div(id="NewCollection"),
-                Div("Estado",id="status",hx_target="this",hx_swap="innerHTML",hx_get="/is_busy",hx_trigger="every 60000ms"),
+                
+                Div(H2("Estado:Disponible para carga"),id="status",hx_target="this",hx_swap="innerHTML",hx_get="/is_busy",hx_trigger="every 60000ms"),
                Div(
-                    Div(id="options",hx_target="this",hx_swap="outerHTML",hx_get="/listmodelactives",hx_trigger="click from:#buttonMenuuser delay:3s"),
-                    Div(id="Infomodel"),
+                    Div(Div(id="options",hx_target="this",hx_swap="outerHTML",hx_get="/listmodelactives",hx_trigger="click from:#buttonMenuuser delay:3s"),cls="col-xs-12"),
+                    Div(Div(id="Infomodel"),cls="col-xs-12"),
                    #Div("Resumen",Div(id="summary",hx_target="this",hx_swap="outerHTML",hx_get="/SummarySources",hx_trigger="click from:#buttonMenuuser"),Div(id="summaryR")),
                    Div(
-                        Form(
-                            Input(id="question", name="message", placeholder="Enter a message"),
-                            Button("Submit",type="submit"), hx_post="/chatData",hx_swap="afterend",hx_target="#questionR" ),
-                        Div(id="questionR")
-                    ,id="questions"),
+                        Div(
+                            Form(
+                                Input(id="question", name="message", placeholder="Enter a message"),
+                                Button("Submit",type="submit"), hx_post="/chatData",hx_swap="afterend",hx_target="#questionR" ),
+                            Div(id="questionR")
+                        ,id="questions"),
+                    cls="col-xs-6"),
                    Div(
-                        Form(
-                            Input(id="query", name="question", placeholder="Enter a query"),
-                            Button("Submit",type="submit"), hx_post="/queryprompt",hx_swap="innerHTML",hx_target="#queryR" ),
-                        Div(id="queryR"),
-                        id="query"),
-                    id="chatbot")
+                        Div(
+                            Form(
+                                Input(id="query", name="question", placeholder="Enter a query"),
+                                Button("Submit",type="submit"), hx_post="/queryprompt",hx_swap="innerHTML",hx_target="#queryR" ),
+                            Div(id="queryR"),
+                            id="query"),
+                        id="chatbot",cls="col-xs-6"),
+                    cls="row", style="color: #fff;")
                ))
    return page

+# @app.post("/upload")
+# def upload(data: UploadFile = File(...),user : str = Form(...), dir: str = Form(...)):
+#     filename="static/"+user+dir+data.filename

+#     with open(f"{filename}", "wb") as buffer:
+#         shutil.copyfileobj(data.file, buffer)

-app.mount("/static", StaticFiles(directory="static"), name="static")
+# app.mount("/static", StaticFiles(directory="static"), name="static")




-serve()
+serve()
+
+
--- a/static/Ricardo/PCL/PCL1_Unidad1(1).pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad1(1).pdf
--- a/static/Ricardo/PCL/PCL1_Unidad10.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad10.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad11.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad11.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad2(1).pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad2(1).pdf
--- a/static/Ricardo/PCL/PCL1_Unidad3.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad3.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad4.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad4.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad5.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad5.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad6.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad6.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad7.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad7.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad8.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad8.pdf
--- a/static/Ricardo/PCL/PCL1_Unidad9.pdf
+++ b/static/Ricardo/PCL/PCL1_Unidad9.pdf
--- a/static/Ricardo/web2py/python-web2py-1RevA.pdf
+++ b/static/Ricardo/web2py/python-web2py-1RevA.pdf