Compare commits
	
		
			No commits in common. "d7a0141a835d4dd4cb1c0d6e6fd1adcca5ee70e8" and "aaad0dc7c230656aaad7bc014f74d4108e2a4ab9" have entirely different histories.
		
	
	
		
			d7a0141a83
			...
			aaad0dc7c2
		
	
		|  | @ -14,3 +14,64 @@ for model in models: | |||
|     modelST.save(save_path) | ||||
| 
 | ||||
| 
 | ||||
| # all-MiniLM-L12-v2 | ||||
|   | ||||
| 
 | ||||
| 
 | ||||
|      | ||||
| # all-MiniLM-L12-v2 Saber sobre actividades culturales. 0.6535751457769086 0.05863175772626888 0.12278595510518776 | ||||
| # all-MiniLM-L12-v2 Saber sobre talleres. 0.751629503845477 0.05310761464124975 0.1831973003891279 | ||||
| # all-MiniLM-L12-v2 Información sobre talleres de literatura. 0.7224854452006415 0.05215076573219591 0.2247900827875677 | ||||
| # all-MiniLM-L12-v2 Información sobre talleres de formación artistica. 0.7008979606232822 0.03950918605037314 0.2588270430294973 | ||||
| # all-MiniLM-L12-v2 Obtener la certificación de uso de suelos. 0.6363654116990891 0.06126748264989437 0.2990496653430867 | ||||
| # all-MiniLM-L12-v2 Reportar un bacheo. 0.5974184966305134 0.14056650047761457 0.33133007445425355 | ||||
| # all-MiniLM-L12-v2 Saber dónde pago un parquímetro. 0.7286070458224445 0.04967551271473011 0.36476032688932597 | ||||
| # paraphrase-MiniLM-L3-v2 | ||||
|   | ||||
| 
 | ||||
| 
 | ||||
|      | ||||
| # paraphrase-MiniLM-L3-v2 Saber sobre actividades culturales. 0.7366279968758482 0.08893400433814432 0.011976916834993183 | ||||
| # paraphrase-MiniLM-L3-v2 Saber sobre talleres. 0.8040920436803051 0.07181478379134668 0.02360300747853405 | ||||
| # paraphrase-MiniLM-L3-v2 Información sobre talleres de literatura. 0.7437334052301269 0.04553266191552214 0.036959598649222894 | ||||
| # paraphrase-MiniLM-L3-v2 Información sobre talleres de formación artistica. 0.743870036748493 0.06526662723048463 0.05061841460893739 | ||||
| # paraphrase-MiniLM-L3-v2 Obtener la certificación de uso de suelos. 0.7717547355774438 0.06484008413761407 0.062440363865978316 | ||||
| # paraphrase-MiniLM-L3-v2 Reportar un bacheo. 0.6655234266285941 0.12495720849140243 0.0751793069659539 | ||||
| # paraphrase-MiniLM-L3-v2 Saber dónde pago un parquímetro. 0.7348896817507707 0.04065274263873351 0.09146604897840968 | ||||
| # all-MiniLM-L6-v2 | ||||
|   | ||||
| 
 | ||||
| 
 | ||||
|      | ||||
| # all-MiniLM-L6-v2 Saber sobre actividades culturales. 0.5873976949286881 0.054536409831093556 0.02166009399126161 | ||||
| # all-MiniLM-L6-v2 Saber sobre talleres. 0.705393021384429 0.06415187629245482 0.040732748103591634 | ||||
| # all-MiniLM-L6-v2 Información sobre talleres de literatura. 0.602608386747181 0.054022995767296696 0.06001406345727309 | ||||
| # all-MiniLM-L6-v2 Información sobre talleres de formación artistica. 0.6445745034623189 0.05229467148751577 0.07957683869127957 | ||||
| # all-MiniLM-L6-v2 Obtener la certificación de uso de suelos. 0.5708618561256799 0.0394827821548067 0.09872836436865465 | ||||
| # all-MiniLM-L6-v2 Reportar un bacheo. 0.5741872079555271 0.13503311454160494 0.11794944529263478 | ||||
| # all-MiniLM-L6-v2 Saber dónde pago un parquímetro. 0.6594361733956011 0.056983523732601314 0.13696542775855874 | ||||
| # all-mpnet-base-v2 | ||||
|   | ||||
| 
 | ||||
| 
 | ||||
|      | ||||
| # all-mpnet-base-v2 Saber sobre actividades culturales. 0.5534035540829121 0.06890411125329764 0.08799683372929411 | ||||
| # all-mpnet-base-v2 Saber sobre talleres. 0.6346356305674484 0.05816374415416716 0.2692093669243579 | ||||
| # all-mpnet-base-v2 Información sobre talleres de literatura. 0.733400957902919 0.062002638662933096 0.4069641581121481 | ||||
| # all-mpnet-base-v2 Información sobre talleres de formación artistica. 0.7403190712146518 0.06573001180535122 0.5049155163315108 | ||||
| # all-mpnet-base-v2 Obtener la certificación de uso de suelos. 0.5057200806006308 0.040962860644441684 0.5848623266759908 | ||||
| # all-mpnet-base-v2 Reportar un bacheo. 0.4282261685120943 0.1256224113877856 0.6733528038240829 | ||||
| # all-mpnet-base-v2 Saber dónde pago un parquímetro. 0.5096540066521769 0.06542826690229307 0.7576164344571671 | ||||
| # multi-qa-mpnet-base-dot-v1 | ||||
|   | ||||
| 
 | ||||
| 
 | ||||
|      | ||||
| # multi-qa-mpnet-base-dot-v1 Saber sobre actividades culturales. 0.5412514848207511 0.049426306929690425 0.08471853328201007 | ||||
| # multi-qa-mpnet-base-dot-v1 Saber sobre talleres. 0.6004619942650676 0.04068730180147856 0.19998745198519724 | ||||
| # multi-qa-mpnet-base-dot-v1 Información sobre talleres de literatura. 0.5422846411740877 0.035149354259768846 0.3127848022388962 | ||||
| # multi-qa-mpnet-base-dot-v1 Información sobre talleres de formación artistica. 0.5810213727598411 0.033041479673933366 0.3915549134308437 | ||||
| # multi-qa-mpnet-base-dot-v1 Obtener la certificación de uso de suelos. 0.5171735715348054 0.029578046799246076 0.4764851579126322 | ||||
| # multi-qa-mpnet-base-dot-v1 Reportar un bacheo. 0.4249011819077356 0.12119208621320086 0.559126246650264 | ||||
| # multi-qa-mpnet-base-dot-v1 Saber dónde pago un parquímetro. 0.49335939772807463 0.047415340138656205 0.65184190138331 | ||||
| 
 | ||||
|  |  | |||
|  | @ -1,67 +1,10 @@ | |||
| """ from sentence_transformers import SentenceTransformer, models | ||||
| 
 | ||||
| ## Step 1: use an existing language model | ||||
| word_embedding_model = models.Transformer('distilroberta-base') | ||||
| 
 | ||||
| ## Step 2: use a pool function over the token embeddings | ||||
| pooling_model = models.Pooling(word_embedding_model.get_word_embedding_dimension()) | ||||
| 
 | ||||
| ## Join steps 1 and 2 using the modules argument | ||||
| model = SentenceTransformer(modules=[word_embedding_model, pooling_model]) | ||||
| 
 | ||||
| from sentence_transformers import InputExample | ||||
| 
 | ||||
| from datasets import load_dataset | ||||
| 
 | ||||
| dataset_id = "embedding-data/QQP_triplets" | ||||
| # dataset_id = "embedding-data/sentence-compression" | ||||
| 
 | ||||
| dataset = load_dataset(dataset_id) | ||||
| 
 | ||||
| 
 | ||||
| train_examples = [] | ||||
| train_data = dataset['train']['set'] | ||||
| # For agility we only 1/2 of our available data | ||||
| n_examples = dataset['train'].num_rows // 2 | ||||
| 
 | ||||
| for i in range(10): | ||||
|   example = train_data[i] | ||||
|   train_examples.append(InputExample(texts=[example['query'], example['pos'][0]])) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| from torch.utils.data import DataLoader | ||||
| 
 | ||||
| train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=2) | ||||
| 
 | ||||
| 
 | ||||
| from sentence_transformers import losses | ||||
| 
 | ||||
| train_loss = losses.MultipleNegativesRankingLoss(model=model) | ||||
| 
 | ||||
| num_epochs = 10 | ||||
| 
 | ||||
| warmup_steps = int(len(train_dataloader) * num_epochs * 0.1) #10% of train data | ||||
| 
 | ||||
| model.fit(train_objectives=[(train_dataloader, train_loss)],epochs=num_epochs,warmup_steps=2)  | ||||
| 
 | ||||
| 
 | ||||
|  """ | ||||
| from sentence_transformers import SentenceTransformer, losses, InputExample | ||||
| from torch.utils.data import DataLoader | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| model="embeddings/all-mpnet-base-v2" | ||||
| modelST = SentenceTransformer(model) | ||||
| 
 | ||||
| 
 | ||||
| train_loss = losses.MultipleNegativesRankingLoss(model=modelST) | ||||
| train_loss = losses.MultipleNegativesRankingLoss(model=model) | ||||
| 
 | ||||
| queries=["reportar un bache en mi comunidad", | ||||
|                 "¿Como reporto un bacheo en mi comunidad?", | ||||
|  | @ -119,15 +62,15 @@ queries=["reportar un bache en mi comunidad", | |||
| 
 | ||||
| train_examples = [] | ||||
| for q in queries: | ||||
|     train_examples.append(InputExample(texts=[ 'Reportar un bacheo',q])) | ||||
|     train_examples.append(InputExample(texts=[q, 'Reportar un bacheo'])) | ||||
| 
 | ||||
| print(train_examples) | ||||
| train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=2) | ||||
| train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=5) | ||||
| print(train_dataloader) | ||||
| num_epochs = 2 | ||||
| 
 | ||||
| warmup_steps = int(len(train_dataloader) * num_epochs * 0.1) #10% of train data | ||||
| 
 | ||||
| modelST.fit(train_objectives=[(train_dataloader, train_loss)],epochs=num_epochs,warmup_steps=2) | ||||
| save_path = './Finetuning/%s/'%(model) | ||||
| # Save the model | ||||
| modelST.save(save_path) | ||||
| modelST.fit(train_objectives=[(train_dataloader, train_loss)], | ||||
|           epochs=num_epochs, | ||||
|           warmup_steps=warmup_steps)  | ||||
|  |  | |||
							
								
								
									
										84
									
								
								main.py
								
								
								
								
							
							
						
						
									
										84
									
								
								main.py
								
								
								
								
							|  | @ -11,7 +11,7 @@ from langchain.document_loaders.recursive_url_loader import RecursiveUrlLoader | |||
| from langchain.document_loaders import UnstructuredURLLoader | ||||
| from langchain.document_loaders.csv_loader import CSVLoader | ||||
| #from langchain import  LLMChain | ||||
| from pydantic import BaseModel | ||||
| from langchain.pydantic_v1 import BaseModel | ||||
| from langchain.schema.embeddings import Embeddings | ||||
| from langchain.document_loaders import DataFrameLoader | ||||
| from langchain.embeddings import HuggingFaceEmbeddings | ||||
|  | @ -25,9 +25,6 @@ from nltk.corpus import stopwords | |||
| import re | ||||
| model="embeddings/all-mpnet-base-v2" | ||||
| entrenamiento="V0.0" | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| class CustomEmbedding(Embeddings, BaseModel,): | ||||
|     """embedding model with preprocessing""" | ||||
|     def _get_embedding(self,text) -> List[float]: | ||||
|  | @ -95,6 +92,14 @@ def loadmodelEmb(model_name = "embeddings/all-MiniLM-L6-v2",model_kwargs = {'dev | |||
|     st = SentenceTransformer(model_name) | ||||
|     return st | ||||
| 
 | ||||
| #emb=loadmodelEmb() | ||||
| CUSTOM_PATH = "/angela" | ||||
| app = FastAPI() | ||||
| 
 | ||||
| 
 | ||||
| @app.get("/") | ||||
| def read_main(): | ||||
|     return {"message": "This is your main app"} | ||||
| 
 | ||||
| def loadCopysAndData(pathsqlite="motor.sqlite"): | ||||
|     con = sqlite3.connect(pathsqlite) | ||||
|  | @ -127,10 +132,6 @@ db=makeFaissdb(documents,"Copies3",emb2) | |||
| db2=makeFaissdb(documents2,"Intencionality3",emb2) | ||||
| #db3=makeFaissdb(documents2,"nameshf",hf) | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| def FinderDbs(query,dbs,filtred=False,th=1.2): | ||||
|     AllData={} | ||||
|     for dbt in dbs: | ||||
|  | @ -147,7 +148,7 @@ def FinderDbs(query,dbs,filtred=False,th=1.2): | |||
|     if filtred: | ||||
|         filtredData={} | ||||
|         for row in AllData.keys(): | ||||
|             if AllData[row]["d"]<th: | ||||
|             if AllData[row]["d"]<1.2: | ||||
|                 filtredData[row]=AllData[row] | ||||
|         filtredData=dict(sorted(filtredData.items(), key=lambda item: item[1]["d"])) | ||||
|         return  filtredData,filtredData.keys() | ||||
|  | @ -157,34 +158,65 @@ def FinderDbs(query,dbs,filtred=False,th=1.2): | |||
|         AllData=dict(sorted(AllData.items(), key=lambda item: item[1]["d"])) | ||||
|         return  AllData,AllData.keys() | ||||
| 
 | ||||
| app = FastAPI() | ||||
| 
 | ||||
| @app.get("/") | ||||
| def read_main(): | ||||
|     return {"message": "This is your main app"} | ||||
| 
 | ||||
| class Response(BaseModel): | ||||
|     query: str | ||||
| 
 | ||||
| 
 | ||||
| filtred=False | ||||
| @app.post("/angela/") | ||||
| def calculate_api(response: Response): | ||||
|     print(response.query) | ||||
|     query = response.query | ||||
| def QARequest(Pregunta,filtred=False): | ||||
|     query = Pregunta | ||||
|     AllData=FinderDbs(query,[db2],filtred) | ||||
|     versionL="_".join([model,entrenamiento]) | ||||
|     if AllData: | ||||
|          | ||||
|         import markdown | ||||
|         AllData = list(AllData) | ||||
|         #lista = "<div style='border-style = solid;border-width:1px;border-radius:10px'>" | ||||
|         lista = "" | ||||
|         dis=[] | ||||
|         id=[] | ||||
|         for k,i in enumerate(AllData[0].items()): | ||||
|             titulo = f"<div style='border-style = solid;border-width:1px;border-radius:10px;margin:14px;padding:14px'><h2>Respuesta {k+1}</h2>" | ||||
|             to_append = markdown.markdown(i[1]['page_content']) | ||||
|             dis.append(str(i[1]['d'])) | ||||
|             id.append(i[0]) | ||||
|     return {"ids": id,"DC":dis,"modelo":versionL} | ||||
| 
 | ||||
|             #print("NNNN",i,k) | ||||
|             lista = lista + titulo + to_append + '</div>' | ||||
|             #lista.append('<br>') | ||||
|         #lista = lista + '</div>' | ||||
|      | ||||
|         AllData[0] = lista | ||||
|      | ||||
| 
 | ||||
|     return id, dis,versionL | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| with gr.Blocks() as demo: | ||||
|     gr.Image("logo.jpg",height=100) | ||||
|     gr.Markdown("Esta es la busqueda que hace el usuario") | ||||
|     Pregunta = gr.Textbox(label="Pregunta") | ||||
|     #Pregunta = re.sub(r"(@\[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|^rt|http.+?", "", Pregunta) | ||||
|     #Pregunta=Pregunta.strip().lower() | ||||
|      | ||||
|     filtred=gr.Checkbox(label="filtrado") | ||||
|      | ||||
|     gr.Markdown("Respuestas para orca desde los copys") | ||||
|     Respuesta = gr.Textbox(label="Respuesta") | ||||
|     id = gr.Textbox(label="id") | ||||
|     metrica=gr.Textbox(label="metrica") | ||||
|      | ||||
|     version = gr.Textbox(label="version") | ||||
|     # id2 = gr.Textbox(label="id2") | ||||
|     # metrica2=gr.Textbox(label="metrica2") | ||||
|     # gr.Markdown("Respuestas para hf desde los names") | ||||
|     # Respuesta3 = gr.Textbox(label="Respuesta3") | ||||
|     # id3 = gr.Textbox(label="id3") | ||||
|     # metrica3=gr.Textbox(label="metrica3") | ||||
|     Enviar_btn = gr.Button("Responder") | ||||
| 
 | ||||
|     Enviar_btn.click(fn=QARequest, inputs=[Pregunta,filtred], outputs=[id,metrica,version], api_name="api_angela") #  | ||||
| 
 | ||||
| #demo.launch(root_path="angela") # | ||||
| 
 | ||||
| gradio_app = gr.routes.App.create_app(demo) | ||||
| 
 | ||||
| app.mount(CUSTOM_PATH, gradio_app) | ||||
| 
 | ||||
| #app = demo.mount_gradio_app(app, io, path=CUSTOM_PATH) | ||||
| 
 | ||||
|  |  | |||
|  | @ -143,8 +143,7 @@ def FinderDbs(query,dbs,filtred=False,th=1.2): | |||
| if args.models=="All": | ||||
|     models=["all-MiniLM-L12-v2","paraphrase-MiniLM-L3-v2" , "all-MiniLM-L6-v2","all-mpnet-base-v2","multi-qa-mpnet-base-dot-v1"] | ||||
| else: | ||||
|     models=["embeddings/all-mpnet-base-v2","Finetuning/embeddings/all-mpnet-base-v2"] | ||||
|      | ||||
|     models=["all-mpnet-base-v2"] | ||||
| queries_bacheo=["Quiero reportar un bacheo", | ||||
|                 "reportar un bache en mi comunidad", | ||||
|                 "¿Como reporto un bacheo en mi comunidad?", | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue