57 lines
1.5 KiB
Python
57 lines
1.5 KiB
Python
import whisper
|
|
import time
|
|
from google.cloud import speech_v1
|
|
from google.cloud.speech_v1 import types
|
|
import json
|
|
from fastapi import FastAPI
|
|
from pydantic import BaseModel
|
|
app = FastAPI()
|
|
|
|
@app.get("/")
|
|
def read_main():
|
|
return {"message": "Ok"}
|
|
class Response(BaseModel):
|
|
path: str
|
|
|
|
|
|
model = whisper.load_model("medium")
|
|
@app.post("/voice2txt/")
|
|
def calculate_api(response: Response):
|
|
path = response.path
|
|
t=time.time()
|
|
result = model.transcribe(path)["text"]
|
|
return {"message": result,"time":time.time()-t}
|
|
|
|
|
|
@app.post("/voice2txtGoogle/")
|
|
def calculate_api_g(response: Response):
|
|
path = response.path
|
|
t=time.time()
|
|
result = transcribe_ogg_audio(path)
|
|
return {"message": result,"time":time.time()-t}
|
|
|
|
|
|
#import argparse
|
|
|
|
import os
|
|
|
|
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "token.json"
|
|
|
|
def transcribe_ogg_audio(audio_file_path, language_code='es-US'):
|
|
client = speech_v1.SpeechClient()
|
|
|
|
with open(audio_file_path, 'rb') as audio_file:
|
|
content = audio_file.read()
|
|
|
|
audio = speech_v1.RecognitionAudio(content=content)
|
|
config = speech_v1.RecognitionConfig(
|
|
encoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS,#.FLAC, # Use OGG encoding
|
|
sample_rate_hertz=48000, # Update this to match your audio file
|
|
language_code=language_code,
|
|
)
|
|
|
|
response = client.recognize(config=config, audio=audio)
|
|
|
|
return list(response.results[0].alternatives)[0].transcript
|
|
#results.alternatives[0].transcript
|