19 wrassp
19.1 Using Praat’s signal processing routines in the EMU-SDMS
19.1.1 To Formant (burg)
to SSFF files
The R code snippet below shows how to generate AsspDataObj
objects from scratch and place data from other sources into SSFF files. The snippet uses speakr, an R package to run Praat scripts from within R. The function praat2AsspDataObj()
runs a Praat script (generated by write_praat_script()
) that executes a Praat command on a given sound file, either To Formant (burg)
or To Pitch
. The Formant
and PitchTier
objects created with these commands is then converted to a comma-separated table with Down to Table
and Down to PitchTier
respectively. The generated table/tier is then read into R and the appropriate columns are placed into tracks of a AsspDataObj
object. The Praat2AsspDataObj
can be viewed as a template function as it can be adapted to use other functions provided by Praat or even other external tools.
NOTE. These functions can be sourced into your R session directly with:
source("https://raw.githubusercontent.com/IPS-LMU/The-EMU-SDMS-Manual/master/R/praat2AsspDataObj.R")
######################################################################
# The script uses speakr::praat_run()
# The following will install speakr if not already installed
if (!require("speakr", character.only = TRUE)) {
install.packages("speakr", dependences = TRUE)
}
library(speakr) # this is currently necessary to find the Praat executable
#' Convert the output of a Praat procedure to a ASSP data object
#'
#' This function creates a Praat object from a sound file (available outputs:
#' "formants") and converts it into an ASSP data object which can be read by
#' wrassp.
#'
#' @param path Path to the .wav file.
#' @param object Praat object to generate (`"formant"` or `"pitch"`).
#' @param time_step Praat argument: The time between the centres of consecutive analysis frames.
#' @param max_fm Formant argument: Maximum number of formants per frames.
#' @param ceiling Formant argument: The maximum frequency of the formant search range in Hz.
#' @param window Formant argument: The effective duration of the analysis window in seconds.
#' @param pre_emph Formant argument: The +3 dB point for an inverted low-pass filter with a slope of +6 dB/octave.
#' @param pitch_floor Pitch argument: Candidates below this frequency will not be recruited.
#' @param pitch_ceiling Pitch argument: Candidates above this frequency will be ignored.
#' @param pitch_units Pitch argument: Pitch unit as character (`"Herz"` or `"semitones"`).
#' @param smooth Pitch argument: If `TRUE`, the pitch track is smoothed using `smooth_bw` as bandwidth.
#' @param smooth_bw Pitch argument: Smoothing bandwidth in Hz.
<- function(
praat2AsspDataObj
path,object = "formant",
time_step = 0.0,
# formant settings
max_fm = 5,
ceiling = 5500.0,
window = 0.025,
pre_emph = 50.0,
# pitch settings
pitch_floor = 75.0,
pitch_ceiling = 600.0,
pitch_units = "Hertz",
smooth = FALSE,
smooth_bw = 10.0
) {
# Praat script path
= file.path(tempdir(), "script.praat")
tmpPraatScript # tmp Praat output path
= file.path(tempdir(), "out.table")
tmpPraatOut
# remove tmp files if they already exist
unlink(tmpPraatScript)
unlink(tmpPraatOut)
# write Praat script to tmp location
write_praat_script(object, tmpPraatScript)
if (object == "formant") {
<- praat_run_formant(tmpPraatScript,
ado
path,
time_step,
max_fm,
ceiling,
window,
pre_emph,
tmpPraatOut,columnNames = c("fm", "bw"))
else if (object == "pitch") {
} = file.path(tempdir(), "pitchInfo.csv")
tmpPitchInfo unlink(tmpPitchInfo)
<- praat_run_pitch(tmpPraatScript,
ado
path,
time_step,
pitch_floor,
pitch_ceiling,
pitch_units,
smooth,
smooth_bw,
tmpPraatOut,
tmpPitchInfo,columnNames = c("f0"))
}
return(ado)
}
######################################################################
#
# function to run Praat formant script
<- function(tmpPraatScript,
praat_run_formant
path,
time_step,
max_fm,
ceiling,
window,
pre_emph,
tmpPraatOut,
columnNames) {# run Praat script
::praat_run(
speakr
tmpPraatScript,
path,
time_step,
max_fm,
ceiling,
window,
pre_emph,
tmpPraatOut
)
# get vals
= read.csv(tmpPraatOut, stringsAsFactors = FALSE)
df == '--undefined--'] = 0
df[df
= df[,c(3, 5, 7, 9, 11)]
fmVals = sapply(colnames(fmVals), function(x){
fmVals as.integer(fmVals[,x])
})colnames(fmVals) = NULL
= data.matrix(df[,c(4, 6, 8, 10, 12)])
bwVals = sapply(colnames(bwVals), function(x){
bwVals as.integer(bwVals[,x])
})colnames(bwVals) = NULL
# get start time
= df[1,1]
startTime
# create AsspDataObj
= list()
ado
attr(ado, "trackFormats") =c("INT16", "INT16")
if(time_step == 0){
= 1 / (0.25 * window)
sR else{
}= 1 / time_step
sR
}
attr(ado, "sampleRate") = sR
= wrassp::read.AsspDataObj(path)
tmpObj attr(ado, "origFreq") = attr(tmpObj, "sampleRate")
attr(ado, "startTime") = startTime
# attr(ado, "startRecord") = as.integer(1)
attr(ado, "endRecord") = as.integer(nrow(fmVals))
class(ado) = "AsspDataObj"
::AsspFileFormat(ado) <- "SSFF"
wrassp::AsspDataFormat(ado) <- as.integer(2) # == binary
wrassp
= wrassp::addTrack(ado, columnNames[1], fmVals, "INT16")
ado
= wrassp::addTrack(ado, columnNames[2], bwVals, "INT16")
ado
# add missing values at the start as Praat sometimes
# has very late start values which causes issues
# in the SSFF file format as this sets the startRecord
# depending on the start time of the first sample
if(startTime > 1/sR) {
= floor(startTime / (1/sR))
nr_of_missing_samples
= matrix(0,
missing_fm_vals nrow = nr_of_missing_samples,
ncol = ncol(ado$fm))
= matrix(0,
missing_bw_vals nrow = nr_of_missing_samples,
ncol = ncol(ado$bw))
# prepend values
$fm = rbind(missing_fm_vals, ado$fm)
ado$bw = rbind(missing_fm_vals, ado$bw)
ado
# fix start time
attr(ado, "startTime") = startTime - nr_of_missing_samples * (1/sR)
return(ado)
}
}
######################################################################
#
# function to run Praat formant script
<- function(tmpPraatScript,
praat_run_pitch
path,
time_step,
pitch_floor,
pitch_ceiling,
pitch_units,
smooth,
smooth_bw,
tmpPraatOut,
tmpPitchInfo,
columnNames) {if (smooth) {
= 1
smooth else {
} = 0
smooth
}
# run Praat script
::praat_run(
speakr
tmpPraatScript,
path,
time_step,
pitch_floor,
pitch_ceiling,
pitch_units,
smooth,
smooth_bw,
tmpPraatOut,
tmpPitchInfo
)
= read.csv(tmpPitchInfo, stringsAsFactors = FALSE)
pitchInfo
= pitchInfo$nframes[1]
nframes = pitchInfo$timestep[1]
timestep = 1/timestep
sR = pitchInfo$start[1]
start = pitchInfo$end[1]
end
# create empty df that holds all time steps
= data.frame(Time = seq(start, end, by = timestep), F0 = 0)
df # get vals
= read.csv(tmpPraatOut, stringsAsFactors = FALSE, sep = "\t")[,2:3]
df_tmp # and fill up empty df (ensures every timestep has a value)
$F0[df$Time %in% df_tmp$Time] = df_tmp$F0
df
df
# create AsspDataObj
= list()
ado
attr(ado, "trackFormats") = c("INT16")
attr(ado, "sampleRate") = sR
= wrassp::read.AsspDataObj(path)
tmpObj attr(ado, "origFreq") = attr(tmpObj, "sampleRate")
attr(ado, "startTime") = start
attr(ado, "endRecord") = as.integer(nframes)
class(ado) = "AsspDataObj"
::AsspFileFormat(ado) <- "SSFF"
wrassp::AsspDataFormat(ado) <- as.integer(2)
wrassp= as.integer(df[,"F0"])
f0Vals = wrassp::addTrack(ado, "f0", f0Vals, "INT16")
ado
# prepend missing values as praat sometimes
# starts fairly late
if(start > 1 / sR){
= floor(start / (1/sR))
nr_of_missing_samples
= matrix(0,
missing_f0_vals nrow = nr_of_missing_samples,
ncol = ncol(ado$f0))
# prepend values
$f0 = rbind(missing_f0_vals, ado$f0)
ado
# fix start time
attr(ado, "startTime") = start - nr_of_missing_samples * (1 / sR)
attr(ado, "startTime") = round(attr(ado, "startTime"), 6)
}return(ado)
}
######################################################################
#
# write Praat script to tmp dir
<- function(script, tmp_out) {
write_praat_script if (script == "formant") {
::write_lines(formant_text, tmp_out)
readrelse if (script == "pitch") {
} ::write_lines(pitch_text, tmp_out)
readrelse {
} stop()
}
}
######################################################################
#
# formant Praat script text
= "# To Formant (burg)
formant_text
form To formants
text path ./
real time_step 0.0
integer max_fm 5
real ceiling 5500.0
real window 0.025
real pre_emph 50.0
text out ./
endform
sound = Read from file: path$
formant = To Formant (burg): time_step, max_fm, ceiling, window, pre_emph
table = Down to Table: \"no\", \"yes\", 6, \"no\", 3, \"yes\", 3, \"yes\"
Save as comma-separated file: out$
"
######################################################################
#
# formant pitch script text
= "# To Pitch
pitch_text
form To pitch
text path ./
real time_step 0.0
real pitch_floor 75.0
real pitch_ceiling 600.0
word pitch_units Hertz
boolean smooth 0
real smooth_bw 10.0
text out ./
text pitchInfo ./
endform
sound = Read from file: path$
pitch = To Pitch: time_step, pitch_floor, pitch_ceiling
if smooth == 0
pitch = Smooth: smooth_bw
endif
nframes = Get number of frames
timestep = Get time step
start = Get time from frame number: 1
end = Get time from frame number: nframes
header$ = \"nframes,timestep,start,end\"
writeFileLine(pitchInfo$, header$)
line$ = \"'nframes','timestep','start','end'\"
appendFileLine(pitchInfo$, line$)
Down to PitchTier
Down to TableOfReal: pitch_units$
Save as headerless spreadsheet file: out$
"
The following code chunk shows how this function can be applied to .wav
files of an emuDB.
library(emuR)
# create demo data in tempdir()
create_emuRdemoData(tempdir())
# create path to demo database
= file.path(tempdir(), "emuR_demoData", "ae_emuDB")
path2ae
# list all .wav files in the ae emuDB
= list.files(path2ae, pattern = "*.wav$",
paths2wavFiles recursive = TRUE, full.names = TRUE)
# loop through files
## formants
for(fp in paths2wavFiles){
= praat2AsspDataObj(fp, object = "formant")
ado = paste0(tools::file_path_sans_ext(fp), '.praatFms')
newPath # print(paste0(fp, ' -> ', newPath)) # uncomment for simple log
::write.AsspDataObj(ado, file = newPath)
wrassp
}## pitch
for(fp in paths2wavFiles){
= praat2AsspDataObj(fp,
ado object = "pitch",
time_step = 0.00625,
pitch_floor = 60,
smooth = TRUE)
= paste0(tools::file_path_sans_ext(fp), '.praatF0')
newPath # print(paste0(fp, ' -> ', newPath)) # uncomment for simple log
::write.AsspDataObj(ado, file = newPath)
wrassp
}
# load emuDB
# (verbose = F is only set to avoid additional output in manual)
= load_emuDB(path2ae, verbose = FALSE)
ae
# add SSFF track definition
add_ssffTrackDefinition(ae,
name = "praatFms",
columnName = "fm",
fileExtension = "praatFms")
add_ssffTrackDefinition(ae,
name = "praatF0",
columnName = "f0",
fileExtension = "praatF0")
# test query + get_trackdata
= query(ae, "Phonetic == n")
sl
# (verbose = F is only set to avoid additional output in manual)
= get_trackdata(ae,
td
sl,ssffTrackName = "praatFms",
verbose = F)
= get_trackdata(ae,
pt
sl,ssffTrackName = "praatF0",
verbose = F)
A few comments about synchronized F0 and Formant values:
- The
time_step
argument is "Time step (s) (standard value: 0.0) the measurement interval (frame duration), in seconds. If you supply 0, Praat will use a time step of 0.75 / (pitch floor), e.g. 0.01 seconds if the pitch floor is 75 Hz; in this example, Praat computes 100 pitch values per second. " Heretime_step
is set to 0.00625 (Seconds) when calculating pitch (as opposed to Praat’s default of 0.0) in order to keep it in line with Time step in “To Formant…,” because Time step in “To Formants…” is derived from window length (which in “To Formant…” defaults to 0.025) (window length/4 (–> Time step in “To Formant…” will usually be 0.00625)) - The
pitch_floor
argument is "Pitch floor (Hz) (standard value: 75 Hz): candidates below this frequency will not be recruited. This parameter determines the length of the analysis window: it will be 3 longest periods long, i.e., if the pitch floor is 75 Hz, the window will be 3/75 = 0.04 seconds long. Note that if you set the time step to zero, the analysis windows for consecutive measurements will overlap appreciably: Praat will always compute 4 pitch values within one window length, i.e., the degree of oversampling is 4." Importantly, this parameter is set NOT to praat’s default 75 Hz, but to 60 Hz, again because of correspondence of window lengths between “To Pitch…” and “To Formants….” The actual window length in “To Formants…” will be twice as long as the value given in the “To Formants…” command, i.e. the default of 0.025 will result in a window length of 0.05. A window length in “To Pitch…” can indirectly achieved by using a pitch floor value of 60 Hz (given that 3/60 = 0.05). In most cases, differing window lengths will not affect the temporal position of the F0 and Formant values, however, due to problems near the edges, sometimes they will (and therefore result in non-synchronized F0 and Formant values). Due to rounding errors, F0 and Formant values still might be slightly asynchronous; to avoid this,praat2AsspDataObj()
rounds the start time with a precicion of 0.001 ms (viaround(attr(ado, "startTime"),6)
at the very end). - The
pitch_ceiling
argument (default: 600) will not affect any of the other parameters.
19.2 Using OpenSMILE signal processing routines in the EMU-SDMS
NOTE: this function can be accessed directly as follows: source("https://raw.githubusercontent.com/IPS-LMU/The-EMU-SDMS-Manual/master/R/SMILExtract2AsspDataObj.R")
##' convert CSV output of SMILExtract to AsspDataObject
##' @param path path to wav file
##' @param SMILExtractPath path to SMILExtract executable
##' @param configPath path to openSMILE config file
##' @param columsAsTracks if TRUE -> every column will be placed in it's own track
##' if FALSE -> every column is placed into a single track called SMILExtractAll
<- function(path,
SMILExtract2AsspDataObj
SMILExtractPath,
configPath,columsAsTracks = TRUE){
= "tmp.csv"
tmp1FileName
= file.path(tempdir(), tmp1FileName)
tmp1FilePath
# remove tmp file if it already exists
unlink(file.path(tempdir(), tmp1FileName))
system(paste0(SMILExtractPath,
" -C ", configPath,
" -I ", path,
" -O ", tmp1FilePath),
ignore.stdout = T,
ignore.stderr = T)
# get vals
= suppressMessages(readr::read_delim(tmp1FilePath,
df delim = ";"))
# extract + remove frameIndex/frameTime
= df$frameIndex
frameIndex = df$frameTime
frameTime
$frameIndex = NULL
df$frameTime = NULL
df
= as.matrix(df)
df
= colnames(df)
colNames
# get start time
= frameTime[1]
startTime
# create AsspDataObj
= list()
ado
attr(ado, "sampleRate") = 1/frameTime[2] # second frameTime should be stepsize
= wrassp::read.AsspDataObj(path)
tmpObj attr(ado, "origFreq") = attr(tmpObj, "sampleRate")
attr(ado, "startTime") = startTime
# attr(ado, "startRecord") = as.integer(1)
attr(ado, "endRecord") = as.integer(nrow(df))
class(ado) = "AsspDataObj"
::AsspFileFormat(ado) <- "SSFF"
wrassp::AsspDataFormat(ado) <- as.integer(2)
wrassp
# add every column as new track
if(columsAsTracks){
attr(ado, "trackFormats") = rep("REAL32", ncol(df))
for(i in 1:ncol(df)){
= wrassp::addTrack(ado,
ado trackname = colNames[i],
data = df[,i],
format = "REAL32")
}else{
}attr(ado, "trackFormats") = "REAL32"
= wrassp::addTrack(ado,
ado trackname = "SMILExtractAll",
data = df,
format = "REAL32")
}
return(ado)
}
How this function can be applied to wav files of an emuDB is shown below.
library(emuR)
# create demo data in tempdir()
create_emuRdemoData(tempdir())
# create path to demo database
= file.path(tempdir(), "emuR_demoData", "ae_emuDB")
path2ae
# list all .wav files in the ae emuDB
= list.files(path2ae,
paths2wavFiles pattern = "*.wav$",
recursive = TRUE,
full.names = TRUE)
# loop through files
for(fp in paths2wavFiles){
= SMILExtract2AsspDataObj(fp,
ado SMILExtractPath = "~/programs/opensmile-2.3.0/bin/SMILExtract",
configPath = "~/programs/opensmile-2.3.0/config/demo/demo1_energy.conf")
= paste0(file_path_sans_ext(fp), '.SMILExtract')
newPath # print(paste0(fp, ' -> ', newPath)) # uncomment for simple log
write.AsspDataObj(ado, file = newPath)
}
# load emuDB
# (verbose = F is only set to avoid additional output in manual)
= load_emuDB(path2ae, verbose = FALSE)
ae
# add SSFF track definition
add_ssffTrackDefinition(ae,
name = "SMILExtract",
columnName = "pcm_LOGenergy",
fileExtension = "SMILExtract")
# test query + get_trackdata
= query(ae, "Phonetic == n")
sl # (verbose = F is only set to avoid additional output in manual)
= get_trackdata(ae,
td
sl,ssffTrackName = "SMILExtract",
verbose = F)
# test display
set_signalCanvasesOrder(ae,
perspectiveName = "default",
order = c("OSCI", "SPEC", "SMILExtract"))
# serve(ae) # uncomment to view in EMU-webApp