Azure语音到文本WebSocket[Python]

2024-10-02 14:27:42 发布

您现在位置:Python中文网/ 问答频道 /正文

我面临的问题是语音.config头在打开事件监听器,它显示了我在下面捕捉到的错误,似乎web套接字无法成功构建。我遵循Micrsoft的官方指南,在建立连接后是否可以发送头?

非常感谢你的帮助!

这是我的代码:

import speech_recognition as sr
import json
import requests
import ssl
import urllib3
import xmltodict
import time
import websocket
import thread
import uuid
from googletrans import Translator
from urllib import urlencode
from auth import AzureAuthClient
import datetime
import struct
import pyaudio
import sys
import StringIO
import platform
import os

def get_wave_header(frame_rate):
    """
    Generate WAV header that precedes actual audio data sent to the speech translation service.
    :param frame_rate: Sampling frequency (8000 for 8kHz or 16000 for 16kHz).
    :return: binary string
    """

    if frame_rate not in [8000, 16000]:
        raise ValueError("Sampling frequency, frame_rate, should be 8000 or 16000.")

    nchannels = 1
    bytes_per_sample = 2

    output = StringIO.StringIO()
    output.write('RIFF')
    output.write(struct.pack('<L', 0))
    output.write('WAVE')
    output.write('fmt ')
    output.write(struct.pack('<L', 18))
    output.write(struct.pack('<H', 0x0001))
    output.write(struct.pack('<H', nchannels))
    output.write(struct.pack('<L', frame_rate))
    output.write(struct.pack('<L', frame_rate * nchannels * bytes_per_sample))
    output.write(struct.pack('<H', nchannels * bytes_per_sample))
    output.write(struct.pack('<H', bytes_per_sample * 8))
    output.write(struct.pack('<H', 0))
    output.write('data')
    output.write(struct.pack('<L', 0))

    data = output.getvalue()
    output.close()
    return data


def get_wave_empty():
    output = StringIO.StringIO()
    # for num in range(1, 801):
    for num in range(1, 41):
        output.write(struct.pack('<L', 0))
    data = output.getvalue()
    output.close()
    return data


def on_open(ws):
    timestap = datetime.datetime.utcnow().isoformat()+'Z'
    speechConfigBody = {
      "context": {
        "system": {
          "version": "2.0.12341",
        },
        "os": {
          "platform": "Linux",
          "name": "Debian",
          "version": "2.14324324"
        },
        "device": {
          "manufacturer": "Contoso",
          "model": "Fabrikan",
          "version": "7.341"
          }
       },
    }

    speechConfigHeaders = ['Path:  speech.config\n', 'X-Timestamp: '+timestap+'\n', 'Content-Type: application/json; charset=utf-8\n']
    print(speechConfigHeaders[0].encode('utf8'))
    print(speechConfigHeaders[0].encode('utf8')+speechConfigHeaders[1].encode('utf8')+speechConfigHeaders[2].encode('utf8')+json.dumps(speechConfigBody))
    ws.send(speechConfigHeaders[0].encode('utf8')+speechConfigHeaders[1].encode('utf8')+speechConfigHeaders[2].encode('utf8')+json.dumps(speechConfigBody))



    #print("Web Socket is opened")
    data = get_wave_header(16000)

    #ws.send(data, websocket.ABNF.OPCODE_BINARY)
    def run(*args):
        print("Websocket Running")
        while True:
            r = sr.Recognizer()
            with sr.Microphone() as source:
                print("Say something!")
                audio = r.listen(source)
                print("After Say")
                print(audio)
                #ws.send(audio, websocket.ABNF.OPCODE_BINARY)
        ws.close()
    thread.start_new_thread(run, ())


def on_data(ws, message, message_type, fin):
    print("on_data")
    print(message)

def on_error(ws, error):
    print("Websocket Error")
    print(error)

def on_close(ws):
    print("Web Socket is closed")

if __name__ == "__main__":
    urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
    translator1 = Translator()
    websocket.enableTrace(True)
    #print(translator1.translate('이 문장은 한글로 쓰여졌습니다.').text)
    # recognize speech using Microsoft Bing Voice Recognition
    BING_KEY = "confidential key"  # Microsoft Bing Voice Recognition API keys 32-character lowercase hexadecimal strings
    auth_client = AzureAuthClient(BING_KEY)
    # post_auth_url = "https://api.cognitive.microsoft.com/sts/v1.0/issueToken"
    # post_auth_header = {"Ocp-Apim-Subscription-Key": BING_KEY}
    # auth_access_code = requests.post(post_auth_url, headers=post_auth_header)
    # print(auth_access_code.content)
    language = "en-US"
    client_trace_id = str(uuid.uuid4().hex)
    print("client_trace_id: "+client_trace_id)
    timestap = datetime.datetime.utcnow().isoformat()+'Z'
    print(timestap)
    azureHost = "wss://speech.platform.bing.com/speech/recognition/conversation/cognitiveservices/v1?language=en-US"
    print("Ready to start web Socket")



    ws_client = websocket.WebSocketApp(
        azureHost,
        header=[
            'Authorization: Bearer '+ auth_client.get_access_token(),
            'X-ConnectionId: ' + client_trace_id
        ],
        on_open = on_open,
        on_data=on_data,
        on_error=on_error,
        on_close=on_close
    )
    #ws_client.on_open = on_open
    ws_client.run_forever(sslopt={"cert_reqs": ssl.CERT_NONE})

Error Message from my CentOS


Tags: importclientauthoutputdatawsondef