New branch unstable
This commit is contained in:
parent
87cd0f780f
commit
9c0b1ce654
22
main.py
22
main.py
|
@ -24,12 +24,23 @@ from nltk.corpus import stopwords
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
#from cleantext import clean
|
#from cleantext import clean
|
||||||
import re
|
import re
|
||||||
from langid.langid import LanguageIdentifier
|
from pathlib import Path
|
||||||
from langid.langid import model as modellangid
|
import json
|
||||||
|
#from langid.langid import LanguageIdentifier
|
||||||
|
#from langid.langid import model as modellangid
|
||||||
import time
|
import time
|
||||||
model="Modelo_embedding_Mexico_Puebla/all-mpnet-base-v2/model"
|
model="Modelo_embedding_Mexico_Puebla/all-mpnet-base-v2/model"
|
||||||
entrenamiento="V1.3"
|
entrenamiento="V1.3"
|
||||||
|
|
||||||
|
def extractConfig(nameModel="Modelo_embedding_Mexico_Puebla",relPath="./conf/experiment_config.json",dataOut="train_dataset_pos"):
|
||||||
|
configPath=Path(relPath)
|
||||||
|
with open(configPath, 'r', encoding='utf-8') as file:
|
||||||
|
config = json.load(file)[nameModel]
|
||||||
|
|
||||||
|
Output= Path(config[dataOut])
|
||||||
|
return Output
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class CustomEmbedding(Embeddings, BaseModel,):
|
class CustomEmbedding(Embeddings, BaseModel,):
|
||||||
|
@ -100,12 +111,12 @@ def loadmodelEmb(model_name = "embeddings/all-MiniLM-L6-v2",model_kwargs = {'dev
|
||||||
return st
|
return st
|
||||||
|
|
||||||
|
|
||||||
def loadCopysAndData(pathsqlite="/opt/web2py/applications/MotorAngela/databases/storage.sqlite"):
|
def loadCopysAndData(pathsqlite=pathDb):
|
||||||
con = sqlite3.connect(pathsqlite)
|
con = sqlite3.connect(pathsqlite)
|
||||||
copies_df = pd.read_sql_query("SELECT * from copies WHERE intentionality IS NOT NULL", con)
|
copies_df = pd.read_sql_query("SELECT * from copies WHERE intentionality IS NOT NULL", con)
|
||||||
copiesT = copies_df
|
copiesT = copies_df
|
||||||
copiesT=copiesT[["copy_message","id","name","intentionality"]]
|
copiesT=copiesT[["copy_message","id","name","intentionality"]]
|
||||||
print(copiesT)
|
#print(copiesT)
|
||||||
data = copiesT
|
data = copiesT
|
||||||
#print(data)
|
#print(data)
|
||||||
B=DataFrameLoader(data,page_content_column="copy_message")
|
B=DataFrameLoader(data,page_content_column="copy_message")
|
||||||
|
@ -117,7 +128,6 @@ def loadCopysAndData(pathsqlite="/opt/web2py/applications/MotorAngela/databases/
|
||||||
def makeFaissdb(documents,folder_path,embedding):
|
def makeFaissdb(documents,folder_path,embedding):
|
||||||
try:
|
try:
|
||||||
db=FAISS.load_local(folder_path=folder_path,embeddings=embedding)
|
db=FAISS.load_local(folder_path=folder_path,embeddings=embedding)
|
||||||
|
|
||||||
except:
|
except:
|
||||||
db = FAISS.from_documents(documents, embedding)
|
db = FAISS.from_documents(documents, embedding)
|
||||||
FAISS.save_local(db,folder_path=folder_path)
|
FAISS.save_local(db,folder_path=folder_path)
|
||||||
|
@ -131,7 +141,7 @@ emb2=CustomEmbedding()
|
||||||
db=makeFaissdb(documents,"Copies3",emb2)
|
db=makeFaissdb(documents,"Copies3",emb2)
|
||||||
db2=makeFaissdb(documents2,"Intentionality3",emb2)
|
db2=makeFaissdb(documents2,"Intentionality3",emb2)
|
||||||
#db3=makeFaissdb(documents2,"nameshf",hf)
|
#db3=makeFaissdb(documents2,"nameshf",hf)
|
||||||
identifier = LanguageIdentifier.from_modelstring(modellangid, norm_probs=True)
|
#identifier = LanguageIdentifier.from_modelstring(modellangid, norm_probs=True)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue