#import whisper import time from google.cloud import speech_v1 from google.cloud.speech_v1 import types import json from fastapi import FastAPI from pydantic import BaseModel app = FastAPI() @app.get("/") def read_main(): return {"message": "Ok"} class Response(BaseModel): path: str ratehertz:int encoding:str #model = whisper.load_model("medium") #@app.post("/voice2txt/") #def calculate_api(response: Response): #path = response.path #t=time.time() #result = model.transcribe(path)["text"] #return {"message": result,"time":time.time()-t} @app.post("/voice2txtGoogle/") def calculate_api_g(response: Response): path = response.path ratehertz = response.ratehertz encoding = response.encoding t=time.time() result = transcribe_audio(audio_file_path=path,ratehertz=ratehertz,encoding=encoding) return {"message": result,"time":time.time()-t} #import argparse import os os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "token.json" def transcribe_audio(audio_file_path, language_code='es-US',ratehertz=48000,encoding="OGG"): client = speech_v1.SpeechClient() if encoding=="OGG": confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS elif encoding=="WEBM": confEncoding=types.RecognitionConfig.AudioEncoding.WEBM_OPUS else: confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS with open(audio_file_path, 'rb') as audio_file: content = audio_file.read() audio = speech_v1.RecognitionAudio(content=content) config = speech_v1.RecognitionConfig( encoding=confEncoding,#.FLAC, # Use OGG encoding sample_rate_hertz=ratehertz, language_code=language_code, ) response = client.recognize(config=config, audio=audio) return list(response.results[0].alternatives)[0].transcript #results.alternatives[0].transcript