diff --git a/example/Prueba1.flac b/example/Prueba1.flac new file mode 100644 index 0000000..20e99c0 Binary files /dev/null and b/example/Prueba1.flac differ diff --git a/main.py b/main.py index 6a776f7..4eda9dd 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,8 @@ import whisper import time +from google.cloud import speech_v1 +from google.cloud.speech_v1 import types +import json from fastapi import FastAPI from pydantic import BaseModel app = FastAPI() @@ -15,5 +18,39 @@ model = whisper.load_model("medium") @app.post("/voice2txt/") def calculate_api(response: Response): path = response.path + t=time.time() result = model.transcribe(path)["text"] - return {"message": result} \ No newline at end of file + return {"message": result,"time":time.time()-t} + + +@app.post("/voice2txtGoogle/") +def calculate_api_g(response: Response): + path = response.path + t=time.time() + result = transcribe_ogg_audio(path) + return {"message": result,"time":time.time()-t} + + +#import argparse + +import os + +os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "/home/mario/.config/gcloud/application_default_credentials.json" + +def transcribe_ogg_audio(audio_file_path, language_code='es-US'): + client = speech_v1.SpeechClient() + + with open(audio_file_path, 'rb') as audio_file: + content = audio_file.read() + + audio = speech_v1.RecognitionAudio(content=content) + config = speech_v1.RecognitionConfig( + encoding=types.RecognitionConfig.AudioEncoding.FLAC, # Use OGG encoding + sample_rate_hertz=44100, # Update this to match your audio file + language_code=language_code, + ) + + response = client.recognize(config=config, audio=audio) + + return list(response.results[0].alternatives)[0].transcript +#results.alternatives[0].transcript \ No newline at end of file