This commit is contained in:
parent
b94be5a668
commit
4514592453
20
main.py
20
main.py
|
@ -12,6 +12,8 @@ def read_main():
|
||||||
return {"message": "Ok"}
|
return {"message": "Ok"}
|
||||||
class Response(BaseModel):
|
class Response(BaseModel):
|
||||||
path: str
|
path: str
|
||||||
|
ratehertz:int
|
||||||
|
encoding:str
|
||||||
|
|
||||||
|
|
||||||
model = whisper.load_model("medium")
|
model = whisper.load_model("medium")
|
||||||
|
@ -26,8 +28,10 @@ def calculate_api(response: Response):
|
||||||
@app.post("/voice2txtGoogle/")
|
@app.post("/voice2txtGoogle/")
|
||||||
def calculate_api_g(response: Response):
|
def calculate_api_g(response: Response):
|
||||||
path = response.path
|
path = response.path
|
||||||
|
ratehertz = response.ratehertz
|
||||||
|
encoding = response.encoding
|
||||||
t=time.time()
|
t=time.time()
|
||||||
result = transcribe_ogg_audio(path)
|
result = transcribe_audio(audio_file_path=path,ratehertz=ratehertz,encoding=encoding)
|
||||||
return {"message": result,"time":time.time()-t}
|
return {"message": result,"time":time.time()-t}
|
||||||
|
|
||||||
|
|
||||||
|
@ -37,16 +41,20 @@ import os
|
||||||
|
|
||||||
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "token.json"
|
os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = "token.json"
|
||||||
|
|
||||||
def transcribe_ogg_audio(audio_file_path, language_code='es-US'):
|
def transcribe_audio(audio_file_path, language_code='es-US',ratehertz=48000,encoding="OGG"):
|
||||||
client = speech_v1.SpeechClient()
|
client = speech_v1.SpeechClient()
|
||||||
|
if encoding=="OGG":
|
||||||
|
confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS
|
||||||
|
elif encoding=="WEBM":
|
||||||
|
confEncoding=types.RecognitionConfig.AudioEncoding.WEBM_OPUS
|
||||||
|
else:
|
||||||
|
confEncoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS
|
||||||
with open(audio_file_path, 'rb') as audio_file:
|
with open(audio_file_path, 'rb') as audio_file:
|
||||||
content = audio_file.read()
|
content = audio_file.read()
|
||||||
|
|
||||||
audio = speech_v1.RecognitionAudio(content=content)
|
audio = speech_v1.RecognitionAudio(content=content)
|
||||||
config = speech_v1.RecognitionConfig(
|
config = speech_v1.RecognitionConfig(
|
||||||
encoding=types.RecognitionConfig.AudioEncoding.OGG_OPUS,#.FLAC, # Use OGG encoding
|
encoding=confEncoding,#.FLAC, # Use OGG encoding
|
||||||
sample_rate_hertz=48000, # Update this to match your audio file
|
sample_rate_hertz=ratehertz,
|
||||||
language_code=language_code,
|
language_code=language_code,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
|
@ -109,13 +109,7 @@ tabulate==0.9.0
|
||||||
tensorboardX==2.6.2.2
|
tensorboardX==2.6.2.2
|
||||||
threadpoolctl==3.2.0
|
threadpoolctl==3.2.0
|
||||||
tiktoken==0.3.3
|
tiktoken==0.3.3
|
||||||
tokenizers==0.14.1
|
tokenizer
|
||||||
torch==2.1.0+cpu
|
|
||||||
torch-audiomentations==0.11.0
|
|
||||||
torch-pitch-shift==1.2.4
|
|
||||||
torchaudio==2.1.0+cpu
|
|
||||||
torchmetrics==1.2.0
|
|
||||||
torchvision==0.16.0+cpu
|
|
||||||
tqdm==4.66.1
|
tqdm==4.66.1
|
||||||
transformers==4.34.1
|
transformers==4.34.1
|
||||||
triton==2.0.0
|
triton==2.0.0
|
||||||
|
|
Loading…
Reference in New Issue