- 1 import whisper
- 2 ?
- 3 model = whisper.load_model("base")
- 4 ?
- 5 # load audio and pad/trim it to fit 30 seconds
- 6 audio = whisper.load_audio("audio.mp3")
- 7 audio = whisper.pad_or_trim(audio)
- 8 ?
- 9 # make log-Mel spectrogram and move to the same device as the model
- 10 mel = whisper.log_mel_spectrogram(audio).to(model.device)
- 11 ?
- 12 # detect the spoken language
- 13 _, probs = model.detect_language(mel)
- 14 print(f"Detected language: {max(probs, key=probs.get)}")
- 15 ?
- 16 # decode the audio
- 17 options = whisper.DecodingOptions()
- 18 result = whisper.decode(model, mel, options)
- 19 ?
- 20 # print the recognized text
- 21 print(result.text)
- import whisper
- from whisper.utils import get_writer
- ?
- model = whisper.load_model('small')
- ?
- def get_transcribe(audio: str, language: str = 'en'):
- return model.transcribe(audio=audio, language=language, verbose=True)
- ?
- def save_file(results, format='tsv'):
- writer = get_writer(format, 'output/')
- writer(results, f'transcribe.{format}')
- ?
- def get_language():
- """
- 构造了个语言选择输入,如果是默认就回车就好了,会设置为英文
- :return:
- """
- language_input = input("input the song language[default->enter]\n"
- "(英语->en、中文->zh、德语->de、西班牙语->es、法语->fr、日语->ja、.....):")
- default = 'en' if not language_input else language_input #如果language_input为空 则语言为英文,否则是输入的语言
- print(f"model language is {default}")
- return default
- ?
- ?
- if __name__ == "__main__":
- result = get_transcribe(audio=input("please input your music path:"), language= get_language())
- print('-'*50)
- print(result.get('text', ''))
- save_file(result)
- save_file(result, 'txt')
- save_file(result, 'srt')