This commit is contained in:
Mario Gonzalez Gil 2023-11-27 17:06:34 +01:00
parent b94be5a668
commit 4514592453
2 changed files with 15 additions and 13 deletions

20
main.py
View File

@ -12,6 +12,8 @@ def read_main():
return {"message": "Ok"}
class Response(BaseModel):
path: str
ratehertz:int
encoding:str
model = whisper.load_model("medium")
@ -26,8 +28,10 @@ def calculate_api(response: Response):
@app.post("/voice2txtGoogle/")
def calculate_api_g(response: Response):
path = response.path
ratehertz = response.ratehertz
encoding = response.encoding
t=time.time()
result = transcribe_ogg_audio(path)
result = transcribe_audio(audio_file_path=path,ratehertz=ratehertz,encoding=encoding)
return {"message": result,"time":time.time()-t}
@ -37,16 +41,20 @@ import os
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "token.json"
def transcribe_ogg_audio(audio_file_path, language_code='es-US'):
def transcribe_audio(audio_file_path, language_code='es-US',ratehertz=48000,encoding="OGG"):
client = speech_v1.SpeechClient()
if encoding=="OGG":
confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS
elif encoding=="WEBM":
confEncoding=types.RecognitionConfig.AudioEncoding.WEBM_OPUS
else:
confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS
with open(audio_file_path, 'rb') as audio_file:
content = audio_file.read()
audio = speech_v1.RecognitionAudio(content=content)
config = speech_v1.RecognitionConfig(
encoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS,#.FLAC, # Use OGG encoding
sample_rate_hertz=48000, # Update this to match your audio file
encoding=confEncoding,#.FLAC, # Use OGG encoding
sample_rate_hertz=ratehertz,
language_code=language_code,
)

View File

@ -109,13 +109,7 @@ tabulate==0.9.0
tensorboardX==2.6.2.2
threadpoolctl==3.2.0
tiktoken==0.3.3
tokenizers==0.14.1
torch==2.1.0+cpu
torch-audiomentations==0.11.0
torch-pitch-shift==1.2.4
torchaudio==2.1.0+cpu
torchmetrics==1.2.0
torchvision==0.16.0+cpu
tokenizer
tqdm==4.66.1
transformers==4.34.1
triton==2.0.0