-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathaudioscanner.py
More file actions
73 lines (52 loc) · 2.37 KB
/
audioscanner.py
File metadata and controls
73 lines (52 loc) · 2.37 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# open root folder
# go to child folder
# go to child folder
from curses import meta
import os
import csv
from os import listdir
from os.path import isfile, join
import audio_metadata
import re
import sys
def atoi(text):
return int(text) if text.isdigit() else text
def natural_keys(text):
return [ atoi(c) for c in re.split(r'(\d+)', text) ]
x = "./" + sys.argv[1] + "/"
mypath = os.path.abspath(x)
onlydirs = [f for f in sorted(listdir(mypath),key=natural_keys)]
if '.DS_Store' in onlydirs:
onlydirs.remove('.DS_Store')
header = ["filepath","filesize","audio_format","bit_depth","bitrate","channels","duration","sample_rate","transcript"]
with open('test_data.csv', 'w', encoding='UTF8') as f:
writer = csv.writer(f)
writer.writerow(header)
for dir in onlydirs:
transcripts = []
current_dir = x + dir + "/"
curr_path = os.path.abspath(current_dir)
audio_files = [f for f in sorted(listdir(curr_path),key=natural_keys) if isfile(join(curr_path, f))]
transcript_dir = current_dir + "Transcript/"
transcript_path = os.path.abspath(transcript_dir)
transcript_files = [f for f in sorted(listdir(transcript_path),key=natural_keys) if isfile(join(transcript_path, f))]
if '.DS_Store' in transcript_files:
transcript_files.remove('.DS_Store')
if '.DS_Store' in audio_files:
audio_files.remove('.DS_Store')
for file in transcript_files:
with open( os.path.abspath(transcript_path + "/" + file ), 'r', encoding='UTF8') as f:
contents = f.read()
transcripts.append(contents)
for index, item in enumerate(audio_files):
metadata = audio_metadata.load(os.path.abspath(curr_path + "/" + item ))
row = [metadata.filepath, metadata.filesize, metadata.streaminfo.audio_format, metadata.streaminfo.bit_depth,
metadata.streaminfo.bitrate, metadata.streaminfo.channels, metadata.streaminfo.duration, metadata.streaminfo.sample_rate, transcripts[index]]
with open('test_data.csv','a') as f:
writer = csv.writer(f)
writer.writerow(row)
print("1 Record Added")
print("All Done!")
#Run in the root of the parent directory with args as the name of the parent directory.,,example: python3 audioscanner.py myfolder,,where myfolder contains the subfolders like AhmadAli
# get all audio files
# get folder with transcripts