From 37602a3c2e543870ac5175b85c63d1a02e7e1639 Mon Sep 17 00:00:00 2001 From: jukaradayi Date: Wed, 28 Feb 2018 10:35:04 +0000 Subject: [PATCH] correct rttm script, add lab script --- run-lab.sh | 49 +++++++++++++++++++++++++++++++++++++++++++++++++ run-rttm.sh | 3 +-- 2 files changed, 50 insertions(+), 2 deletions(-) create mode 100755 run-lab.sh diff --git a/run-lab.sh b/run-lab.sh new file mode 100755 index 0000000..1c5bb5d --- /dev/null +++ b/run-lab.sh @@ -0,0 +1,49 @@ +#!/bin/bash +# +# Run script to generate HTK MFCC features, given a WAV audio file +# then given a speech/nonspeech file (extension .rttm), run DiarTK +# (also known as ib_diarization_toolkit) to produce RTTM clustered +# utterances and generated speaker IDs + +# Assumes 10ms frame size in .scp file; to change, edit line in htkconfig: +# TARGETRATE = 100000.0 + + +numargs=3 +if [ $# -lt $numargs ]; then + echo "Usage: run-rttm.sh ipfile rttmfile outdir" + echo "" + echo "ipfile : Audio input file in WAV format, extension .wav" + echo "rttmfile: speech/nonspeech file, format:" + echo " Type file chan tbeg tdur ortho stype name conf Slat" + echo "produces output in folder outdir/" + exit +fi + +filename=$(basename "$1") +basename="${filename%.*}" + +workdir=$3 + +mkdir -p $workdir + +featfile=$workdir/$basename.fea +scpfile=$workdir/$basename.scp + +# first-first convert RTTM to DiarTK's version of a .scp file +# SCP format: +# __=[start,end] +# RTTM format: +# Type file chan tbeg tdur ortho stype name conf Slat +# math: convert lab seconds to HTK (10ms default) frames = multiply by 100 +grep " speech" $2 | awk -v base="$basename" -v feats="$featfile" '{begg=$1*100;endd=$2*100; print base "_" begg "_" endd "="feats "[" begg "," endd "]"}' > $scpfile + +# first generate HTK features +HCopy -T 2 -C htkconfig $1 $featfile + +# next run DiarTK +scripts/run.diarizeme.sh $featfile $scpfile $workdir $basename + +# print results +cat $workdir/$basename.out + diff --git a/run-rttm.sh b/run-rttm.sh index 4c18715..86e8eca 100755 --- a/run-rttm.sh +++ b/run-rttm.sh @@ -36,9 +36,8 @@ scpfile=$workdir/$basename.scp # RTTM format: # Type file chan tbeg tdur ortho stype name conf Slat # math: convert RTTM seconds to HTK (10ms default) frames = multiply by 100 -grep LEXEME $2 | awk -v base="$basename" -v feats="$featfile" '{begg=$4*100;endd=($4+$5)*100; print base "_" begg "_" endd "="feats "[" begg "," endd "]"}' > $scpfile +grep SPEAKER $2 | awk -v base="$basename" -v feats="$featfile" '{begg=$4*100;endd=($4+$5)*100; print base "_" begg "_" endd "="feats "[" begg "," endd "]"}' > $scpfile -exit # first generate HTK features HCopy -T 2 -C htkconfig $1 $featfile