New version of main.py with data, strucutre and others

This commit is contained in:
Mario Gonzalez Gil 2023-11-21 21:47:37 +01:00
parent 739036e038
commit 55df233473
1 changed files with 8 additions and 7 deletions

15
main.py
View File

@ -24,7 +24,7 @@ from nltk.corpus import stopwords
from typing import Optional from typing import Optional
#from cleantext import clean #from cleantext import clean
import re import re
model="FTv1/all-mpnet-base-v2" model="Modelo_embedding_Mexico_Puebla/all-mpnet-base-v2/model"
entrenamiento="V1.0" entrenamiento="V1.0"
@ -99,13 +99,14 @@ def loadmodelEmb(model_name = "embeddings/all-MiniLM-L6-v2",model_kwargs = {'dev
def loadCopysAndData(pathsqlite="motor.sqlite"): def loadCopysAndData(pathsqlite="motor.sqlite"):
con = sqlite3.connect(pathsqlite) con = sqlite3.connect(pathsqlite)
copies_df = pd.read_sql_query("SELECT * from copies", con) copies_df = pd.read_sql_query("SELECT * from copies WHERE intentionality IS NOT NULL", con)
copiesT = copies_df[copies_df.copy_start =="T"] copiesT = copies_df
copiesT=copiesT[["copy_message","id","name","intencionality"]] copiesT=copiesT[["copy_message","id","name","intentionality"]]
print(copiesT)
data = copiesT data = copiesT
#print(data) #print(data)
B=DataFrameLoader(data,page_content_column="copy_message") B=DataFrameLoader(data,page_content_column="copy_message")
B2=DataFrameLoader(data,page_content_column="intencionality") B2=DataFrameLoader(data,page_content_column="intentionality")
documents=B.load() documents=B.load()
documents2=B2.load() documents2=B2.load()
return documents,documents2 return documents,documents2
@ -125,14 +126,14 @@ documents,documents2=loadCopysAndData()
emb=loadmodelEmb(model_name = model) emb=loadmodelEmb(model_name = model)
emb2=CustomEmbedding() emb2=CustomEmbedding()
db=makeFaissdb(documents,"Copies3",emb2) db=makeFaissdb(documents,"Copies3",emb2)
db2=makeFaissdb(documents2,"Intencionality3",emb2) db2=makeFaissdb(documents2,"Intentionality3",emb2)
#db3=makeFaissdb(documents2,"nameshf",hf) #db3=makeFaissdb(documents2,"nameshf",hf)
def FinderDbs(query,dbs,filtred=1.2): def FinderDbs(query,dbs,filtred=0.4):
AllData={} AllData={}
for dbt in dbs: for dbt in dbs:
Sal = dbt.similarity_search_with_score(query,4) Sal = dbt.similarity_search_with_score(query,4)