當(dāng)前位置：首頁(yè) >

语音识别1：音频信号采集、并存入 wav文件

發(fā)布時(shí)間：2025/3/21 44 豆豆

生活随笔收集整理的這篇文章主要介紹了语音识别1：音频信号采集、并存入 wav文件小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

1 說(shuō)明

語(yǔ)音識(shí)別的基本操作是：1）能夠錄音 2）能夠?qū)浺粑募鎯?chǔ)。以下將列出此操作，并逐條語(yǔ)句講解。

2 音頻錄制代碼

import pyaudio import wave from tqdm import tqdm def record_audio(wave_out_path,record_second):CHUNK = 1024FORMAT = pyaudio.paInt16CHANNELS = 2RATE = 44100p = pyaudio.PyAudio()stream = p.open(format=FORMAT,channels=CHANNELS,rate=RATE,input=True,frames_per_buffer=CHUNK)wf = wave.open(wave_out_path, 'wb')wf.setnchannels(CHANNELS)wf.setsampwidth(p.get_sample_size(FORMAT))wf.setframerate(RATE)print("* recording")for i in tqdm(range(0, int(RATE / CHUNK * record_second))):data = stream.read(CHUNK)wf.writeframes(data)print("* done recording")stream.stop_stream()stream.close()p.terminate()wf.close() record_audio("output.wav",record_second=4)

注解：

1）包含對(duì)象說(shuō)明

import pyaudio? ?# 是個(gè)操作類(lèi)，可以生成pyaudio對(duì)象，對(duì)stream進(jìn)行操作
import wave? ? ? ??# 是對(duì)聲波信號(hào)進(jìn)行處理的對(duì)象
from tqdm import tqdm? ?? #是一個(gè)進(jìn)入條? ? ?

?2）函數(shù)說(shuō)明

def record_audio(wave_out_path,record_second):? ? ? #wav文件路徑，錄音時(shí)長(zhǎng)
? CHUNK = 1024? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?# 緩存buff大小
? FORMAT = pyaudio.paInt16? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?# 信號(hào)的幅度用2字節(jié)整數(shù)表示,可以paInt24，或paInt24
? CHANNELS = 2? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # 雙聲道
? RATE = 44100? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # 采樣頻率
? p = pyaudio.PyAudio()? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # 生成一個(gè)操作對(duì)象

?stream = p.open(format=FORMAT,?channels=CHANNELS,?rate=RATE,? input=True, frames_per_buffer=CHUNK)? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # 按照參數(shù)生成一個(gè)流對(duì)象

? wf = wave.open(wave_out_path, 'wb')? ? ? ? ?# 打開(kāi)Wav文件，文件對(duì)象為wf
? wf.setnchannels(CHANNELS)? ? ? ? ? ? ? ? ? ? ?# 將流對(duì)象參數(shù)CHANNELS傳入文件對(duì)象
? wf.setsampwidth( p.get_sample_size(FORMAT))? #? ? ? get_sample_size(FORMAT)獲得? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ---?FORMATE?的字節(jié)數(shù)
? wf.setframerate(RATE)? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?# 設(shè)定文件的采樣頻率
? print("* recording")
? for i in tqdm(range(0, int( RATE / CHUNK * record_second))):?

? ? ? ? ? ? ? ? ? ? ? ?-------------? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? #RATE / CHUNK每秒需要幾個(gè)buff緩存
? ? data = stream.read(CHUNK)? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # 取出buff數(shù)據(jù)，臨時(shí)存于data變量? ? ? ? ??
? ? wf.writeframes(data)? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? #將data數(shù)據(jù)存盤(pán)。
? print("* done recording")
? stream.stop_stream()? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?# 停止流
? stream.close()? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? # 關(guān)閉流
? p.terminate()? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?# 中斷處理
? wf.close()? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ?#關(guān)閉文件

record_audio("output.wav",record_second=4)? ? ?# 主程序入口? ?

3 專(zhuān)業(yè)的寫(xiě)法

import pyaudio import waveclass Recorder(object):'''A recorder class for recording audio to a WAV file.Records in mono by default.'''def __init__(self, channels=1, rate=44100, frames_per_buffer=1024):self.channels = channelsself.rate = rateself.frames_per_buffer = frames_per_bufferdef open(self, fname, mode='wb'):return RecordingFile(fname, mode, self.channels, self.rate,self.frames_per_buffer)class RecordingFile(object):def __init__(self, fname, mode, channels, rate, frames_per_buffer):self.fname = fnameself.mode = modeself.channels = channelsself.rate = rateself.frames_per_buffer = frames_per_bufferself._pa = pyaudio.PyAudio()self.wavefile = self._prepare_file(self.fname, self.mode)self._stream = Nonedef __enter__(self):return selfdef __exit__(self, exception, value, traceback):self.close()def record(self, duration):# Use a stream with no callback function in blocking modeself._stream = self._pa.open(format=pyaudio.paInt16,channels=self.channels,rate=self.rate,input=True,frames_per_buffer=self.frames_per_buffer)for _ in range(int(self.rate / self.frames_per_buffer * duration)):audio = self._stream.read(self.frames_per_buffer)self.wavefile.writeframes(audio)return Nonedef start_recording(self):# Use a stream with a callback in non-blocking modeself._stream = self._pa.open(format=pyaudio.paInt16,channels=self.channels,rate=self.rate,input=True,frames_per_buffer=self.frames_per_buffer,stream_callback=self.get_callback())self._stream.start_stream()return selfdef stop_recording(self):self._stream.stop_stream()return selfdef get_callback(self):def callback(in_data, frame_count, time_info, status):self.wavefile.writeframes(in_data)return in_data, pyaudio.paContinuereturn callbackdef close(self):self._stream.close()self._pa.terminate()self.wavefile.close()def _prepare_file(self, fname, mode='wb'):wavefile = wave.open(fname, mode)wavefile.setnchannels(self.channels)wavefile.setsampwidth(self._pa.get_sample_size(pyaudio.paInt16))wavefile.setframerate(self.rate)return wavefileif __name__=="__main__":myRecord = Recorder()myXR = myRecord.open("./myvoice.wav")myXR.record(4)myXR.start_recording()myXR.stop_recording()myXR.close()

總結(jié)

以上是生活随笔為你收集整理的语音识别1：音频信号采集、并存入 wav文件的全部?jī)?nèi)容，希望文章能夠幫你解決所遇到的問(wèn)題。

如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò)，歡迎將生活随笔推薦給好友。

上一篇：语音识别2：CTC对齐的算法
下一篇：人工智能简史（Rockwell Anyo