From 55df2334730fc5bfd2a2d286a988198a0ec16354 Mon Sep 17 00:00:00 2001 From: Mario Gonzalez Gil Date: Tue, 21 Nov 2023 21:47:37 +0100 Subject: [PATCH] New version of main.py with data, strucutre and others --- main.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/main.py b/main.py index 667d193..fdcad57 100644 --- a/main.py +++ b/main.py @@ -24,7 +24,7 @@ from nltk.corpus import stopwords from typing import Optional #from cleantext import clean import re -model="FTv1/all-mpnet-base-v2" +model="Modelo_embedding_Mexico_Puebla/all-mpnet-base-v2/model" entrenamiento="V1.0" @@ -99,13 +99,14 @@ def loadmodelEmb(model_name = "embeddings/all-MiniLM-L6-v2",model_kwargs = {'dev def loadCopysAndData(pathsqlite="motor.sqlite"): con = sqlite3.connect(pathsqlite) - copies_df = pd.read_sql_query("SELECT * from copies", con) - copiesT = copies_df[copies_df.copy_start =="T"] - copiesT=copiesT[["copy_message","id","name","intencionality"]] + copies_df = pd.read_sql_query("SELECT * from copies WHERE intentionality IS NOT NULL", con) + copiesT = copies_df + copiesT=copiesT[["copy_message","id","name","intentionality"]] + print(copiesT) data = copiesT #print(data) B=DataFrameLoader(data,page_content_column="copy_message") - B2=DataFrameLoader(data,page_content_column="intencionality") + B2=DataFrameLoader(data,page_content_column="intentionality") documents=B.load() documents2=B2.load() return documents,documents2 @@ -125,14 +126,14 @@ documents,documents2=loadCopysAndData() emb=loadmodelEmb(model_name = model) emb2=CustomEmbedding() db=makeFaissdb(documents,"Copies3",emb2) -db2=makeFaissdb(documents2,"Intencionality3",emb2) +db2=makeFaissdb(documents2,"Intentionality3",emb2) #db3=makeFaissdb(documents2,"nameshf",hf) -def FinderDbs(query,dbs,filtred=1.2): +def FinderDbs(query,dbs,filtred=0.4): AllData={} for dbt in dbs: Sal = dbt.similarity_search_with_score(query,4)