65 lines
1.8 KiB
Python
65 lines
1.8 KiB
Python
#import whisper
|
|
import time
|
|
from google.cloud import speech_v1
|
|
from google.cloud.speech_v1 import types
|
|
import json
|
|
from fastapi import FastAPI
|
|
from pydantic import BaseModel
|
|
app = FastAPI()
|
|
|
|
@app.get("/")
|
|
def read_main():
|
|
return {"message": "Ok"}
|
|
class Response(BaseModel):
|
|
path: str
|
|
ratehertz:int
|
|
encoding:str
|
|
|
|
|
|
#model = whisper.load_model("medium")
|
|
#@app.post("/voice2txt/")
|
|
#def calculate_api(response: Response):
|
|
#path = response.path
|
|
#t=time.time()
|
|
#result = model.transcribe(path)["text"]
|
|
#return {"message": result,"time":time.time()-t}
|
|
|
|
|
|
@app.post("/voice2txtGoogle/")
|
|
def calculate_api_g(response: Response):
|
|
path = response.path
|
|
ratehertz = response.ratehertz
|
|
encoding = response.encoding
|
|
t=time.time()
|
|
result = transcribe_audio(audio_file_path=path,ratehertz=ratehertz,encoding=encoding)
|
|
return {"message": result,"time":time.time()-t}
|
|
|
|
|
|
#import argparse
|
|
|
|
import os
|
|
|
|
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "token.json"
|
|
|
|
def transcribe_audio(audio_file_path, language_code='es-US',ratehertz=48000,encoding="OGG"):
|
|
client = speech_v1.SpeechClient()
|
|
if encoding=="OGG":
|
|
confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS
|
|
elif encoding=="WEBM":
|
|
confEncoding=types.RecognitionConfig.AudioEncoding.WEBM_OPUS
|
|
else:
|
|
confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS
|
|
with open(audio_file_path, 'rb') as audio_file:
|
|
content = audio_file.read()
|
|
audio = speech_v1.RecognitionAudio(content=content)
|
|
config = speech_v1.RecognitionConfig(
|
|
encoding=confEncoding,#.FLAC, # Use OGG encoding
|
|
sample_rate_hertz=ratehertz,
|
|
language_code=language_code,
|
|
)
|
|
|
|
response = client.recognize(config=config, audio=audio)
|
|
|
|
return list(response.results[0].alternatives)[0].transcript
|
|
#results.alternatives[0].transcript
|