import whisper import time from google.cloud import speech_v1 from google.cloud.speech_v1 import types import json from fastapi import FastAPI from pydantic import BaseModel app = FastAPI() @app.get("/") def read_main(): return {"message": "Ok"} class Response(BaseModel): path: str model = whisper.load_model("medium") @app.post("/voice2txt/") def calculate_api(response: Response): path = response.path t=time.time() result = model.transcribe(path)["text"] return {"message": result,"time":time.time()-t} @app.post("/voice2txtGoogle/") def calculate_api_g(response: Response): path = response.path t=time.time() result = transcribe_ogg_audio(path) return {"message": result,"time":time.time()-t} #import argparse import os os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "/home/mario/.config/gcloud/application_default_credentials.json" def transcribe_ogg_audio(audio_file_path, language_code='es-US'): client = speech_v1.SpeechClient() with open(audio_file_path, 'rb') as audio_file: content = audio_file.read() audio = speech_v1.RecognitionAudio(content=content) config = speech_v1.RecognitionConfig( encoding=types.RecognitionConfig.AudioEncoding.FLAC, # Use OGG encoding sample_rate_hertz=44100, # Update this to match your audio file language_code=language_code, ) response = client.recognize(config=config, audio=audio) return list(response.results[0].alternatives)[0].transcript #results.alternatives[0].transcript