通过脚本文件自动把长音频分割成短音频。
简介
这个脚本的功能是将长音频,按textgrid文件里面的时长要求分割成对应短音频。 linux和Windows通用 使用方法: 1.将wav文件放入wav文件夹下 2.将textgrid文件放入text文件夹下 3.执行 python3 main.py 执行完成后,text_output和wav_output会有对应的数据
音频分割脚本
侵删转自:https://github.com/hankerbiao/audio_split
from pydub import AudioSegment
import os, re
def get_second_part_wav(main_wav_path, start_time, end_time, part_wav_path):
"""
音频切片,获取部分音频,单位秒
:param main_wav_path: 原音频文件路径
:param start_time: 截取的开始时间
:param end_time: 截取的结束时间
:param part_wav_path: 截取后的音频路径
:return:
"""
start_time = int(start_time)
end_time = int(end_time)
sound = AudioSegment.from_mp3(main_wav_path)
word = sound[start_time:end_time]
word.export(part_wav_path, format="wav")
def is_number(uchar):
"""判断一个unicode是否是数字"""
if u'\u0039' >= uchar >= u'\u0030' or uchar == '.':
return True
else:
return False
def extract_num(string):
"""从字符串中提出数字"""
num_str = ''
for i in string:
if is_number(i):
num_str += i
return float(num_str)
def extract_text(string):
"""从字符串中提出文本"""
pattern = re.compile('"(.*)"')
a = pattern.findall(string)
if a:
return a[0]
else:
return ''
def get_filename(filename):
"""获取文件名和扩展名"""
(filepath, tempfilename) = os.path.split(filename)
(shotname, extension) = os.path.splitext(tempfilename)
res = [shotname, extension]
return res
def main():
wav_path = "wav"
text_path = "text"
wav_output = "wav_output"
text_output = "text_output"
wav_list = os.listdir(wav_path)
text_list = os.listdir(text_path)
wav_list.sort()
text_list.sort()
if len(wav_list) != len(text_list):
raise ValueError('the number of wav unmatch the number of text')
for i in range(len(text_list)):
textfile = text_path + '/' + text_list[i]
wavfile = wav_path + '/' + wav_list[i]
with open(textfile, 'r', encoding="utf8")as f:
lines = f.readlines()
xmin = []
xmax = []
for line in lines:
if "xmin = " in line:
xmin.append(extract_num(line))
if "xmax = " in line:
xmax.append(extract_num(line))
xmin = xmin[2:]
xmax = xmax[2:]
string_list = []
head = ""
text_outfile = text_output + '/' + text_list[i]
fo = open(text_outfile.replace('textGrid','txt'), 'w', encoding='utf-8')
with open(textfile, 'r', encoding="utf8") as f:
lines = f.readlines()
for line in lines:
if 'intervals [' in line:
string = head+str(int(extract_num(line))).zfill(6)+".wav" + "\t"
if 'text = "' in line:
string += extract_text(line) + '\n'
string_list.append(string)
if len(string) <= 19:
continue
fo.write(string)
fo.close()
count2 = 0
num = 0
for j in range(len(xmin)):
if len(string_list[j]) > 19:
s = xmin[j] * 1000
e = xmax[j] * 1000
print(s)
count2 = j + 1
wav_outfile = wav_output + '/' + wav_list[i].replace('.wav', '')
try:
os.makedirs(wav_outfile)
except:
pass
part_path = wav_outfile + '/' +head+str(count2).zfill(6) + ".wav"
get_second_part_wav(wavfile, s, e, part_path)
if __name__ == '__main__':
main()
TextGrid文件格式
File type = "ooTextFile"
Object class = "TextGrid"
xmin = xxxx.xxxx # 表示开始时间
xmax = xxxx.xxxx # 表示结束时间
tiers? <exists> # 这一行固定
size = 4 # 表示这个文件有几个item, item也叫tiers, 可以翻译为'层', 这个值是几,就表示有几个item
item []:
item [1]:
class = "IntervalTier"
name = "phone"
xmin = 1358.8925
xmax = 1422.5525
intervals: size = 104
intervals [1]:
xmin = 1358.8925
xmax = 1361.8925
text = "sil"
intervals [2]:
xmin = 1361.8925
xmax = 1362.0125
text = "R"
intervals [3]:
...
intervals [104]:
xmin = 1422.2325
xmax = 1422.5525
text = "sil"
item [2]:
class = "IntervalTier"
name = "word"
xmin = 1358.8925
xmax = 1422.5525
intervals: size = 3
intervals [1]:
xmin = 1358.8925
xmax = 1361.8925
text = "sp"