New branch unstable

This commit is contained in:
Mario Gil 2023-12-21 13:16:49 -05:00
parent 87cd0f780f
commit 9c0b1ce654
1 changed files with 17 additions and 7 deletions

24
main.py
View File

@ -24,12 +24,23 @@ from nltk.corpus import stopwords
from typing import Optional from typing import Optional
#from cleantext import clean #from cleantext import clean
import re import re
from langid.langid import LanguageIdentifier from pathlib import Path
from langid.langid import model as modellangid import json
#from langid.langid import LanguageIdentifier
#from langid.langid import model as modellangid
import time import time
model="Modelo_embedding_Mexico_Puebla/all-mpnet-base-v2/model" model="Modelo_embedding_Mexico_Puebla/all-mpnet-base-v2/model"
entrenamiento="V1.3" entrenamiento="V1.3"
def extractConfig(nameModel="Modelo_embedding_Mexico_Puebla",relPath="./conf/experiment_config.json",dataOut="train_dataset_pos"):
configPath=Path(relPath)
with open(configPath, 'r', encoding='utf-8') as file:
config = json.load(file)[nameModel]
Output= Path(config[dataOut])
return Output
class CustomEmbedding(Embeddings, BaseModel,): class CustomEmbedding(Embeddings, BaseModel,):
@ -100,12 +111,12 @@ def loadmodelEmb(model_name = "embeddings/all-MiniLM-L6-v2",model_kwargs = {'dev
return st return st
def loadCopysAndData(pathsqlite="/opt/web2py/applications/MotorAngela/databases/storage.sqlite"): def loadCopysAndData(pathsqlite=pathDb):
con = sqlite3.connect(pathsqlite) con = sqlite3.connect(pathsqlite)
copies_df = pd.read_sql_query("SELECT * from copies WHERE intentionality IS NOT NULL", con) copies_df = pd.read_sql_query("SELECT * from copies WHERE intentionality IS NOT NULL", con)
copiesT = copies_df copiesT = copies_df
copiesT=copiesT[["copy_message","id","name","intentionality"]] copiesT=copiesT[["copy_message","id","name","intentionality"]]
print(copiesT) #print(copiesT)
data = copiesT data = copiesT
#print(data) #print(data)
B=DataFrameLoader(data,page_content_column="copy_message") B=DataFrameLoader(data,page_content_column="copy_message")
@ -116,8 +127,7 @@ def loadCopysAndData(pathsqlite="/opt/web2py/applications/MotorAngela/databases/
def makeFaissdb(documents,folder_path,embedding): def makeFaissdb(documents,folder_path,embedding):
try: try:
db=FAISS.load_local(folder_path=folder_path,embeddings=embedding) db=FAISS.load_local(folder_path=folder_path,embeddings=embedding)
except: except:
db = FAISS.from_documents(documents, embedding) db = FAISS.from_documents(documents, embedding)
FAISS.save_local(db,folder_path=folder_path) FAISS.save_local(db,folder_path=folder_path)
@ -131,7 +141,7 @@ emb2=CustomEmbedding()
db=makeFaissdb(documents,"Copies3",emb2) db=makeFaissdb(documents,"Copies3",emb2)
db2=makeFaissdb(documents2,"Intentionality3",emb2) db2=makeFaissdb(documents2,"Intentionality3",emb2)
#db3=makeFaissdb(documents2,"nameshf",hf) #db3=makeFaissdb(documents2,"nameshf",hf)
identifier = LanguageIdentifier.from_modelstring(modellangid, norm_probs=True) #identifier = LanguageIdentifier.from_modelstring(modellangid, norm_probs=True)