如何解决使用来自服务器的数据而不是文件来转录 Microsoft azure 语音 SDK
我正在尝试将数据发送到 Azure 语音 SDK 进行转录。我希望它从 python 文件接收数据,放入缓冲区,然后连续转录。 我正在使用 azure 语音 SDK 中的此示例。
#include <stdio.h>
#include <stdarg.h>
#include <stdlib.h>
#define infinity 1000
struct Node {
unsigned char value;
int visited,distance,edge_count;
int* weights,weight_assign_index,freed;
struct Node** edges;
};
typedef struct Node Node;
Node* init_node(const unsigned char value,const int edge_count) {
Node* node = malloc(sizeof(Node));
node -> value = value;
node -> visited = 0;
node -> distance = infinity;
node -> edge_count = edge_count;
node -> weights = malloc(edge_count * sizeof(int));
node -> weight_assign_index = 0;
node -> freed = 0;
node -> edges = malloc(edge_count * sizeof(Node*));
return node;
}
void assign_edges(Node* node,const int amount,...) {
va_list edges;
va_start(edges,amount);
for (int i = 0; i < amount; i++)
node -> edges[i] = va_arg(edges,Node*);
va_end(edges);
}
void assign_weight(Node* node_1,Node* node_2,const int weight) {
for (int i = 0; i < node_1 -> edge_count; i++) {
if (node_1 -> edges[i] == node_2) {
node_1 -> weights[node_1 -> weight_assign_index++] = weight;
node_2 -> weights[node_2 -> weight_assign_index++] = weight;
}
}
}
void deinit_graph(Node* node) {
if (!node -> freed) {
node -> freed = 1;
free(node -> weights);
for (int i = 0; i < node -> edge_count; i++)
deinit_graph(node -> edges[i]);
free(node -> edges);
}
}
void dijkstra(Node* current,Node* goal) {
Node local_closest;
local_closest.distance = infinity;
Node* closest = &local_closest;
printf("Current: %c\n",current -> value);
for (int i = 0; i < current -> edge_count; i++) {
Node* neighbor = current -> edges[i];
if (!neighbor -> visited) {
printf("New neighbor: %c\n",neighbor -> value);
const int tentative_distance = current -> distance + current -> weights[i];
if (tentative_distance < neighbor -> distance)
neighbor -> distance = tentative_distance;
if (neighbor -> distance < closest -> distance)
closest = neighbor;
}
}
printf("%c,closest,distance of %d\n",closest -> value,closest -> distance);
current -> visited = 1;
if (closest == goal) printf("Shortest distance is %d\n",closest -> distance);
else dijkstra(closest,goal);
}
int main() {
Node
*a = init_node('a',2),*b = init_node('b',3),*c = init_node('c',*d = init_node('d',*e = init_node('e',*f = init_node('f',*g = init_node('g',*h = init_node('h',2);
assign_edges(a,2,e,b);
assign_edges(b,3,a,f,c);
assign_edges(c,b,h,d);
assign_edges(d,c,g);
assign_edges(e,g);
assign_edges(f,h);
assign_edges(g,d);
assign_edges(h,c);
assign_weight(a,2);
assign_weight(a,4);
assign_weight(b,1);
assign_weight(b,1);
assign_weight(f,1);
assign_weight(h,1);
assign_weight(c,d,2);
assign_weight(d,g,1);
assign_weight(g,1);
e -> distance = 0;
dijkstra(e,f);
deinit_graph(a);
}
我不想从音频文件“callback = WavFileReaderCallback(weatherfilename)”中获取数据,而是从另一个通过以下方式发送数据的 python 文件中获取数据:
def speech_recognition_with_pull_stream():
"""gives an example how to use a pull audio stream to recognize speech from a custom audio
source"""
class WavFileReaderCallback(speechsdk.audio.PullAudioInputStreamCallback):
"""Example class that implements the Pull Audio Stream interface to recognize speech from
an audio file"""
def __init__(self,filename: str):
super().__init__()
self._file_h = wave.open(filename,mode=None)
self.sample_width = self._file_h.getsampwidth()
assert self._file_h.getnchannels() == 1
assert self._file_h.getsampwidth() == 2
assert self._file_h.getframerate() == 16000
assert self._file_h.getcomptype() == 'NONE'
def read(self,buffer: memoryview) -> int:
"""read callback function"""
size = buffer.nbytes
frames = self._file_h.readframes(size // self.sample_width)
buffer[:len(frames)] = frames
return len(frames)
def close(self):
"""close callback function"""
self._file_h.close()
speech_config = speechsdk.SpeechConfig(subscription=speech_key,region=service_region)
# specify the audio format
wave_format = speechsdk.audio.AudioStreamFormat(samples_per_second=16000,bits_per_sample=16,channels=1)
# setup the audio stream
callback = WavFileReaderCallback(weatherfilename)
stream = speechsdk.audio.PullAudioInputStream(callback,wave_format)
audio_config = speechsdk.audio.AudioConfig(stream=stream)
# instantiate the speech recognizer with pull stream input
speech_recognizer = speechsdk.SpeechRecognizer(speech_config=speech_config,audio_config=audio_config)
done = False
def stop_cb(evt):
"""callback that signals to stop continuous recognition upon receiving an event `evt`"""
print('CLOSING on {}'.format(evt))
nonlocal done
done = True
# Connect callbacks to the events fired by the speech recognizer
speech_recognizer.recognizing.connect(lambda evt: print('RECOGNIZING: {}'.format(evt)))
speech_recognizer.recognized.connect(lambda evt: print('RECOGNIZED: {}'.format(evt)))
speech_recognizer.session_started.connect(lambda evt: print('SESSION STARTED: {}'.format(evt)))
speech_recognizer.session_stopped.connect(lambda evt: print('SESSION STOPPED {}'.format(evt)))
speech_recognizer.canceled.connect(lambda evt: print('CANCELED {}'.format(evt)))
# stop continuous recognition on either session stopped or canceled events
speech_recognizer.session_stopped.connect(stop_cb)
speech_recognizer.canceled.connect(stop_cb)
# Start continuous speech recognition
speech_recognizer.start_continuous_recognition()
while not done:
time.sleep(.5)
speech_recognizer.stop_continuous_recognition()
如何将这些数据放入缓冲区中以便语音 SDK 进行转录。请指导,谢谢。
解决方法
我是语音 SDK 团队的 Darren。请查看 SpeechSDK GitHub 存储库上的 Speech_recognition_with_push_stream Python 示例: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/054b4783de9d52f28109c435bf90e073513fec97/samples/python/console/speech_sample.py#L417
我认为这就是您要找的。p>
根据您的数据可用性模型,另一种可能是语音识别_with_pull_stream: https://github.com/Azure-Samples/cognitive-services-speech-sdk/blob/054b4783de9d52f28109c435bf90e073513fec97/samples/python/console/speech_sample.py#L346
如果您需要进一步的帮助,请随时打开 GitHub 问题: https://github.com/Azure-Samples/cognitive-services-speech-sdk/issues
谢谢,
达伦
版权声明:本文内容由互联网用户自发贡献,该文观点与技术仅代表作者本人。本站仅提供信息存储空间服务,不拥有所有权,不承担相关法律责任。如发现本站有涉嫌侵权/违法违规的内容, 请发送邮件至 dio@foxmail.com 举报,一经查实,本站将立刻删除。