end point de google cloaud

2023-10-27 21:23:54 -05:00 · 2023-10-27 21:23:54 -05:00 · cd30ded760
parent eaea723d26
commit cd30ded760
2 changed files with 38 additions and 1 deletions
--- a/example/Prueba1.flac
+++ b/example/Prueba1.flac
--- a/main.py
+++ b/main.py
@ -1,5 +1,8 @@
 import whisper
 import time
+from google.cloud import speech_v1
+from google.cloud.speech_v1 import types
+import json
 from fastapi import FastAPI
 from pydantic import BaseModel
 app = FastAPI()
@ -15,5 +18,39 @@ model = whisper.load_model("medium")
@app.post("/voice2txt/")
 def calculate_api(response: Response):
    path = response.path
+    t=time.time()
    result = model.transcribe(path)["text"]
-    return {"message": result}
+    return {"message": result,"time":time.time()-t}
+
+
+@app.post("/voice2txtGoogle/")
+def calculate_api_g(response: Response):
+    path = response.path
+    t=time.time()
+    result = transcribe_ogg_audio(path)
+    return {"message": result,"time":time.time()-t}
+
+
+#import argparse
+
+import os
+
+os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "/home/mario/.config/gcloud/application_default_credentials.json"
+
+def transcribe_ogg_audio(audio_file_path, language_code='es-US'):
+    client = speech_v1.SpeechClient()
+   
+    with open(audio_file_path, 'rb') as audio_file:
+        content = audio_file.read()
+   
+    audio = speech_v1.RecognitionAudio(content=content)
+    config = speech_v1.RecognitionConfig(
+        encoding=types.RecognitionConfig.AudioEncoding.FLAC,  # Use OGG encoding
+        sample_rate_hertz=44100,  # Update this to match your audio file
+        language_code=language_code,
+    )
+   
+    response = client.recognize(config=config, audio=audio)
+
+    return list(response.results[0].alternatives)[0].transcript
+#results.alternatives[0].transcript