使用IBM Watson语音到文本API的语音识别

# credentials from IBM Cloud Service for Speech to text WATSON_API_KEY = '{API}' WATSON_STT_URL = '{URL}' ## Implementing the Speech To Text module import os import json import pandas as pd from ibm_watson import SpeechToTextV1 from ibm_cloud_sdk_core.authenticators import IAMAuthenticator def watson_batch_stt(filename: str, lang: str, encoding: str) -> str: authenticator = IAMAuthenticator(WATSON_API_KEY) speech_to_text = SpeechToTextV1(authenticator=authenticator) speech_to_text.set_service_url(WATSON_STT_URL) with open(filename, 'rb') as audio_file: response = speech_to_text.recognize( audio=audio_file, content_type='audio/{}'.format(os.path.splitext(filename)[1][1:]), model=lang + '_NarrowbandModel', max_alternatives=0, speaker_labels=True, inactivity_timeout=-1, ).get_result() return response SpeechToTextResults = watson_batch_stt('FILENAME.wav','en-US','UTF-8') print(json.dumps(SpeechToTextResults)) jsonconvo = json.dumps(SpeechToTextResults) jsonconvo = json.loads(jsonconvo) speakers=pd.DataFrame(jsonconvo['speaker_labels']).loc[:,['from','speaker','to']] convo=pd.DataFrame(jsonconvo['results'][0]['alternatives'][0]['timestamps']) speakers=speakers.join(convo) ChangeSpeaker=speakers.loc[speakers['speaker'].shift()!=speakers['speaker']].index Transcript=pd.DataFrame(columns=['from','to','speaker','transcript']) for counter in range(0,len(ChangeSpeaker)): print(counter) currentindex=ChangeSpeaker[counter] try: nextIndex=ChangeSpeaker[counter+1]-1 temp=speakers.loc[currentindex:nextIndex,:] except: temp=speakers.loc[currentindex:,:] Transcript=Transcript.append(pd.DataFrame([[temp.head(1)['from'].values[0],temp.tail(1)['to'].values[0],temp.head(1)['speaker'].values[0],temp[0].tolist()]],columns=['from','to','speaker','transcript'])) print(Transcript)

{ "result_index": 0, "results": [ { "final": true, "alternatives": [ { "transcript": "thanks for calling Company Name %HESITATION this is Ross ", "confidence": 0.73, "timestamps": [ [ "thanks", 0.71, 1.0 ], [ "for", 1.06, 1.23 ], [ "calling", 1.23, 1.68 ], [ "Company", 1.68, 1.95 ], [ "Name", 1.98, 2.35 ], [ "%HESITATION", 2.35, 2.61 ], [ "this", 2.61, 2.82 ], [ "is", 2.82, 2.94 ], [ "Ross", 2.94, 3.33 ] ] } ] }, { "final": true, "alternatives": [ { "transcript": "yes Ross %HESITATION I have a new puppy eight eight weeks old ", "confidence": 0.88, "timestamps": [ [ "yes", 5.42, 5.78 ], [ "Ross", 5.78, 6.13 ], [ "%HESITATION", 6.23, 6.66 ], [ "I", 6.66, 6.8 ], [ "have", 6.8, 6.99 ], [ "a", 6.99, 7.05 ], [ "new", 7.05, 7.21 ], [ "puppy", 7.21, 7.84 ], [ "eight", 8.31, 8.63 ], [ "eight", 8.77, 8.98 ], [ "weeks", 8.98, 9.27 ], [ "old", 9.27, 9.63 ] ] } ] }, { "final": true, "alternatives": [ { "transcript": "%HESITATION it's %HESITATION Australian and miniature Australian shepherd doodle necks ", "confidence": 0.77, "timestamps": [ [ "%HESITATION", 10.62, 11.29 ], [ "it's", 11.82, 12.04 ], [ "%HESITATION", 12.04, 12.37 ], [ "Australian", 12.45, 13.22 ], [ "and", 13.25, 13.43 ], [ "miniature", 13.43, 13.89 ], [ "Australian", 13.89, 14.49 ], [ "shepherd", 14.49, 15.13 ], [ "doodle", 15.65, 16.01 ], [ "necks", 16.01, 16.62 ] ] } ] },

from to speaker transcript 0 0.71 6.13 0 [thanks, for, calling, Company Name, %HESITATION,... 0 6.23 28.85 1 [nan, nan, nan, nan, nan, nan, nan, nan, nan, ...

2条回答

网友

1楼 · 编辑于 2024-09-19 23:42:20

试试这个，它会有用的。谢谢你的其他代码

data = []
for alts in res['results']:
    for i in alts['alternatives']:
        data.extend(i['timestamps'])
convo = pd.DataFrame(data)

网友

2楼 · 编辑于 2024-09-19 23:42:20

就我所知，你只是从结果中选取了第一个元素-

convo=pd.DataFrame(jsonconvo['results'][0]['alternatives'][0]['timestamps'])

你需要所有的结果

您得到的nan是由索引不存在的字段引起的not a number错误

相关问题更多 >

编程相关推荐

热门问题

热门文章