Custom models of huginface

2023-09-28 14:48:59 -05:00 · 2023-09-28 14:48:59 -05:00 · 3ddeacd83f
parent 8f262fe9ba
commit 3ddeacd83f
4 changed files with 25 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -4,3 +4,5 @@ names/*
 nameshf/*
 photo_2023-09-24_00-25-17.jpg
 __pycache__/FindinDB.cpython-38.pyc
 embeddings/*
 tuned_models/*
--- a/FindinDB.py
+++ b/FindinDB.py
@ -41,6 +41,7 @@ def read_main():
    return {"message": "This is your main app"}
 def loadModels():
    #model = GPT4All("orca-mini-3b.ggmlv3.q4_0.bin")
    callback_manager = BaseCallbackManager([StreamingStdOutCallbackHandler()])
--- a/loadModelinlocal.py
+++ b/loadModelinlocal.py
@ -0,0 +1,9 @@
 from langchain.embeddings import HuggingFaceEmbeddings
 df=HuggingFaceEmbeddings(
    model_name="embeddings/all-MiniLM-L6-v2"
 )
 text = "This is a test document."
 query_result = df.embed_query(text)
 print(query_result)
--- a/retrainEmbbeding.py
+++ b/retrainEmbbeding.py
@ -0,0 +1,13 @@
 from sentence_transformers import SentenceTransformer
 # Preguntas y respuestas especializado en eso "multi-qa-mpnet-base-dot-v1"
 # uno de uso gereal el de mejor desempeño all-mpnet-base-v2
 # el mas rapido "paraphrase-MiniLM-L3-v2" y "all-MiniLM-L6-v2"
 # muy rappudo y muy acertado "all-MiniLM-L12-v2"
 models=["all-MiniLM-L12-v2","paraphrase-MiniLM-L3-v2" , "all-MiniLM-L6-v2","all-mpnet-base-v2","multi-qa-mpnet-base-dot-v1"]
 for model in models:
    modelST = SentenceTransformer(model)
    # Define the path where you want to save the model
    save_path = './embeddings/%s/'%(model)
    # Save the model
    modelST.save(save_path)