19 wrassp

19.1 Using Praat’s signal processing routines in the EMU-SDMS

19.1.1 To Formant (burg) to SSFF files

The R code snippet below shows how to generate AsspDataObj objects from scratch and place data from other sources into SSFF files. The snippet uses speakr, an R package to run Praat scripts from within R. The function praat2AsspDataObj() runs a Praat script (generated by write_praat_script()) that executes a Praat command on a given sound file, either To Formant (burg) or To Pitch. The Formant and PitchTier objects created with these commands is then converted to a comma-separated table with Down to Table and Down to PitchTier respectively. The generated table/tier is then read into R and the appropriate columns are placed into tracks of a AsspDataObj object. The Praat2AsspDataObj can be viewed as a template function as it can be adapted to use other functions provided by Praat or even other external tools.

NOTE. These functions can be sourced into your R session directly with:

source("https://raw.githubusercontent.com/IPS-LMU/The-EMU-SDMS-Manual/master/R/praat2AsspDataObj.R")
######################################################################
# The script uses speakr::praat_run()
# The following will install speakr if not already installed
if (!require("speakr", character.only = TRUE)) {
  install.packages("speakr", dependences = TRUE)
}

library(speakr) # this is currently necessary to find the Praat executable

#' Convert the output of a Praat procedure to a ASSP data object
#'
#' This function creates a Praat object from a sound file (available outputs:
#' "formants") and converts it into an ASSP data object which can be read by
#' wrassp.
#'
#' @param path Path to the .wav file.
#' @param object Praat object to generate (`"formant"` or `"pitch"`).
#' @param time_step Praat argument: The time between the centres of consecutive analysis frames.
#' @param max_fm Formant argument: Maximum number of formants per frames.
#' @param ceiling Formant argument: The maximum frequency of the formant search range in Hz.
#' @param window Formant argument: The effective duration of the analysis window in seconds.
#' @param pre_emph Formant argument: The +3 dB point for an inverted low-pass filter with a slope of +6 dB/octave.
#' @param pitch_floor Pitch argument: Candidates below this frequency will not be recruited.
#' @param pitch_ceiling Pitch argument: Candidates above this frequency will be ignored.
#' @param pitch_units Pitch argument: Pitch unit as character (`"Herz"` or `"semitones"`).
#' @param smooth Pitch argument: If `TRUE`, the pitch track is smoothed using `smooth_bw` as bandwidth.
#' @param smooth_bw Pitch argument: Smoothing bandwidth in Hz.
praat2AsspDataObj <- function(
  path,
  object = "formant",
  time_step = 0.0,
  # formant settings
  max_fm = 5,
  ceiling = 5500.0,
  window = 0.025,
  pre_emph = 50.0,
  # pitch settings
  pitch_floor = 75.0,
  pitch_ceiling = 600.0,
  pitch_units = "Hertz",
  smooth = FALSE,
  smooth_bw = 10.0
) {

  # Praat script path
  tmpPraatScript = file.path(tempdir(), "script.praat")
  # tmp Praat output path
  tmpPraatOut = file.path(tempdir(), "out.table")

  # remove tmp files if they already exist
  unlink(tmpPraatScript)
  unlink(tmpPraatOut)

  # write Praat script to tmp location
  write_praat_script(object, tmpPraatScript)

  if (object == "formant") {
    ado <- praat_run_formant(tmpPraatScript,
                             path,
                             time_step,
                             max_fm,
                             ceiling,
                             window,
                             pre_emph,
                             tmpPraatOut,
                             columnNames = c("fm", "bw"))
  } else if (object == "pitch") {
    tmpPitchInfo = file.path(tempdir(), "pitchInfo.csv")
    unlink(tmpPitchInfo)

    ado <- praat_run_pitch(tmpPraatScript,
                            path,
                            time_step,
                            pitch_floor,
                            pitch_ceiling,
                            pitch_units,
                            smooth,
                            smooth_bw,
                            tmpPraatOut,
                            tmpPitchInfo,
                            columnNames = c("f0"))
  }

  return(ado)
}


######################################################################
#
# function to run Praat formant script
praat_run_formant <- function(tmpPraatScript,
                             path,
                             time_step,
                             max_fm,
                             ceiling,
                             window,
                             pre_emph,
                             tmpPraatOut,
                             columnNames) {
  # run Praat script
  speakr::praat_run(
    tmpPraatScript,
    path,
    time_step,
    max_fm,
    ceiling,
    window,
    pre_emph,
    tmpPraatOut
  )

  # get vals
  df = read.csv(tmpPraatOut, stringsAsFactors = FALSE)
  df[df == '--undefined--'] = 0

  fmVals = df[,c(3, 5, 7, 9, 11)]
  fmVals = sapply(colnames(fmVals), function(x){
    as.integer(fmVals[,x])
  })
  colnames(fmVals) = NULL
  bwVals = data.matrix(df[,c(4, 6, 8, 10, 12)])
  bwVals = sapply(colnames(bwVals), function(x){
    as.integer(bwVals[,x])
  })
  colnames(bwVals) = NULL

  # get start time
  startTime = df[1,1]

  # create AsspDataObj
  ado = list()

  attr(ado, "trackFormats") =c("INT16", "INT16")

  if(time_step == 0){
    sR = 1 / (0.25 * window)
  }else{
    sR = 1 / time_step
  }

  attr(ado, "sampleRate") = sR

  tmpObj = wrassp::read.AsspDataObj(path)
  attr(ado, "origFreq") = attr(tmpObj, "sampleRate")

  attr(ado, "startTime") = startTime

  # attr(ado, "startRecord") = as.integer(1)

  attr(ado, "endRecord") = as.integer(nrow(fmVals))

  class(ado) = "AsspDataObj"

  wrassp::AsspFileFormat(ado) <- "SSFF"
  wrassp::AsspDataFormat(ado) <- as.integer(2) # == binary

  ado = wrassp::addTrack(ado, columnNames[1], fmVals, "INT16")

  ado = wrassp::addTrack(ado, columnNames[2], bwVals, "INT16")

  # add missing values at the start as Praat sometimes
  # has very late start values which causes issues
  # in the SSFF file format as this sets the startRecord
  # depending on the start time of the first sample
  if(startTime > 1/sR) {
    nr_of_missing_samples = floor(startTime / (1/sR))

    missing_fm_vals = matrix(0,
                             nrow = nr_of_missing_samples,
                             ncol = ncol(ado$fm))

    missing_bw_vals = matrix(0,
                             nrow = nr_of_missing_samples,
                             ncol = ncol(ado$bw))

    # prepend values
    ado$fm = rbind(missing_fm_vals, ado$fm)
    ado$bw = rbind(missing_fm_vals, ado$bw)

    # fix start time
    attr(ado, "startTime") = startTime - nr_of_missing_samples * (1/sR)
    return(ado)
  }
}





######################################################################
#
# function to run Praat formant script
praat_run_pitch <- function(tmpPraatScript,
                            path,
                            time_step,
                            pitch_floor,
                            pitch_ceiling,
                            pitch_units,
                            smooth,
                            smooth_bw,
                            tmpPraatOut,
                            tmpPitchInfo,
                            columnNames) {
  if (smooth) {
    smooth = 1
  } else {
    smooth = 0
  }

  # run Praat script
  speakr::praat_run(
    tmpPraatScript,
    path,
    time_step,
    pitch_floor,
    pitch_ceiling,
    pitch_units,
    smooth,
    smooth_bw,
    tmpPraatOut,
    tmpPitchInfo
  )

  pitchInfo = read.csv(tmpPitchInfo, stringsAsFactors = FALSE)

  nframes = pitchInfo$nframes[1]
  timestep = pitchInfo$timestep[1]
  sR = 1/timestep
  start = pitchInfo$start[1]
  end = pitchInfo$end[1]

  # create empty df that holds all time steps
  df = data.frame(Time = seq(start, end, by = timestep), F0 = 0)
  # get vals
  df_tmp = read.csv(tmpPraatOut, stringsAsFactors = FALSE, sep = "\t")[,2:3]
  # and fill up empty df (ensures every timestep has a value)
  df$F0[df$Time %in% df_tmp$Time] = df_tmp$F0
  df

  # create AsspDataObj
  ado = list()

  attr(ado, "trackFormats") = c("INT16")
  attr(ado, "sampleRate") = sR

  tmpObj = wrassp::read.AsspDataObj(path)
  attr(ado, "origFreq") = attr(tmpObj, "sampleRate")

  attr(ado, "startTime") = start
  attr(ado, "endRecord") = as.integer(nframes)

  class(ado) = "AsspDataObj"

  wrassp::AsspFileFormat(ado) <- "SSFF"
  wrassp::AsspDataFormat(ado) <- as.integer(2)
  f0Vals = as.integer(df[,"F0"])
  ado = wrassp::addTrack(ado, "f0", f0Vals, "INT16")

  # prepend missing values as praat sometimes
  # starts fairly late
  if(start > 1 / sR){
    nr_of_missing_samples = floor(start / (1/sR))

    missing_f0_vals = matrix(0,
                             nrow = nr_of_missing_samples,
                             ncol = ncol(ado$f0))

    # prepend values
    ado$f0 = rbind(missing_f0_vals, ado$f0)

    # fix start time
    attr(ado, "startTime") = start - nr_of_missing_samples * (1 / sR)
    attr(ado, "startTime") = round(attr(ado, "startTime"), 6)
  }
  return(ado)
}


######################################################################
#
# write Praat script to tmp dir
write_praat_script <- function(script, tmp_out) {
  if (script == "formant") {
    readr::write_lines(formant_text, tmp_out)
  } else if (script == "pitch") {
    readr::write_lines(pitch_text, tmp_out)
  } else {
    stop()
  }
}

######################################################################
#
# formant Praat script text
formant_text = "# To Formant (burg)

form To formants
  text path ./
  real time_step 0.0
  integer max_fm 5
  real ceiling 5500.0
  real window 0.025
  real pre_emph 50.0
  text out ./
endform

sound = Read from file: path$

formant = To Formant (burg): time_step, max_fm, ceiling, window, pre_emph

table = Down to Table: \"no\", \"yes\", 6, \"no\", 3, \"yes\", 3, \"yes\"

Save as comma-separated file: out$

"


######################################################################
#
# formant pitch script text
pitch_text = "# To Pitch

form To pitch
  text path ./
  real time_step 0.0
  real pitch_floor 75.0
  real pitch_ceiling 600.0
  word pitch_units Hertz
  boolean smooth 0
  real smooth_bw 10.0
  text out ./
  text pitchInfo ./
endform

sound = Read from file: path$

pitch = To Pitch: time_step, pitch_floor, pitch_ceiling

if smooth == 0
  pitch = Smooth: smooth_bw
endif

nframes = Get number of frames
timestep = Get time step
start = Get time from frame number: 1
end = Get time from frame number: nframes

header$ = \"nframes,timestep,start,end\"
writeFileLine(pitchInfo$, header$)
line$ = \"'nframes','timestep','start','end'\"
appendFileLine(pitchInfo$, line$)

Down to PitchTier

Down to TableOfReal: pitch_units$

Save as headerless spreadsheet file: out$

"

The following code chunk shows how this function can be applied to .wav files of an emuDB.

library(emuR)

# create demo data in tempdir()
create_emuRdemoData(tempdir())

# create path to demo database
path2ae = file.path(tempdir(), "emuR_demoData", "ae_emuDB")

# list all .wav files in the ae emuDB
paths2wavFiles = list.files(path2ae, pattern = "*.wav$",
                            recursive = TRUE, full.names = TRUE)

# loop through files
## formants
for(fp in paths2wavFiles){
  ado = praat2AsspDataObj(fp, object = "formant")
  newPath = paste0(tools::file_path_sans_ext(fp), '.praatFms')
  # print(paste0(fp, ' -> ', newPath)) # uncomment for simple log
  wrassp::write.AsspDataObj(ado, file = newPath)
}
## pitch
for(fp in paths2wavFiles){
  ado = praat2AsspDataObj(fp,
                          object = "pitch",
                          time_step = 0.00625,
                          pitch_floor = 60,
                          smooth = TRUE)
  newPath = paste0(tools::file_path_sans_ext(fp), '.praatF0')
  # print(paste0(fp, ' -> ', newPath)) # uncomment for simple log
  wrassp::write.AsspDataObj(ado, file = newPath)
}

# load emuDB
# (verbose = F is only set to avoid additional output in manual)
ae = load_emuDB(path2ae, verbose = FALSE)

# add SSFF track definition
add_ssffTrackDefinition(ae,
                        name = "praatFms",
                        columnName = "fm",
                        fileExtension = "praatFms")

add_ssffTrackDefinition(ae,
                        name = "praatF0",
                        columnName = "f0",
                        fileExtension = "praatF0")

# test query + get_trackdata
sl = query(ae, "Phonetic == n")

# (verbose = F is only set to avoid additional output in manual)
td = get_trackdata(ae,
                   sl,
                   ssffTrackName = "praatFms",
                   verbose = F)

pt = get_trackdata(ae,
                   sl,
                   ssffTrackName = "praatF0",
                   verbose = F)

A few comments about synchronized F0 and Formant values:

  • The time_step argument is "Time step (s) (standard value: 0.0) the measurement interval (frame duration), in seconds. If you supply 0, Praat will use a time step of 0.75 / (pitch floor), e.g. 0.01 seconds if the pitch floor is 75 Hz; in this example, Praat computes 100 pitch values per second. " Here time_step is set to 0.00625 (Seconds) when calculating pitch (as opposed to Praat’s default of 0.0) in order to keep it in line with Time step in “To Formant…,” because Time step in “To Formants…” is derived from window length (which in “To Formant…” defaults to 0.025) (window length/4 (–> Time step in “To Formant…” will usually be 0.00625))
  • The pitch_floor argument is "Pitch floor (Hz) (standard value: 75 Hz): candidates below this frequency will not be recruited. This parameter determines the length of the analysis window: it will be 3 longest periods long, i.e., if the pitch floor is 75 Hz, the window will be 3/75 = 0.04 seconds long. Note that if you set the time step to zero, the analysis windows for consecutive measurements will overlap appreciably: Praat will always compute 4 pitch values within one window length, i.e., the degree of oversampling is 4." Importantly, this parameter is set NOT to praat’s default 75 Hz, but to 60 Hz, again because of correspondence of window lengths between “To Pitch…” and “To Formants….” The actual window length in “To Formants…” will be twice as long as the value given in the “To Formants…” command, i.e. the default of 0.025 will result in a window length of 0.05. A window length in “To Pitch…” can indirectly achieved by using a pitch floor value of 60 Hz (given that 3/60 = 0.05). In most cases, differing window lengths will not affect the temporal position of the F0 and Formant values, however, due to problems near the edges, sometimes they will (and therefore result in non-synchronized F0 and Formant values). Due to rounding errors, F0 and Formant values still might be slightly asynchronous; to avoid this, praat2AsspDataObj() rounds the start time with a precicion of 0.001 ms (via round(attr(ado, "startTime"),6) at the very end).
  • The pitch_ceiling argument (default: 600) will not affect any of the other parameters.

19.2 Using OpenSMILE signal processing routines in the EMU-SDMS

NOTE: this function can be accessed directly as follows: source("https://raw.githubusercontent.com/IPS-LMU/The-EMU-SDMS-Manual/master/R/SMILExtract2AsspDataObj.R")

##' convert CSV output of SMILExtract to AsspDataObject
##' @param path path to wav file
##' @param SMILExtractPath path to SMILExtract executable
##' @param configPath path to openSMILE config file
##' @param columsAsTracks if TRUE -> every column will be placed in it's own track
##' if FALSE -> every column is placed into a single track called SMILExtractAll
SMILExtract2AsspDataObj <- function(path,
                                    SMILExtractPath,
                                    configPath,
                                    columsAsTracks = TRUE){

  tmp1FileName = "tmp.csv"

  tmp1FilePath = file.path(tempdir(), tmp1FileName)

  # remove tmp file if it already exists
  unlink(file.path(tempdir(), tmp1FileName))

  system(paste0(SMILExtractPath,
                " -C ", configPath,
                " -I ", path,
                " -O ", tmp1FilePath),
         ignore.stdout = T,
         ignore.stderr = T)

  # get vals
  df = suppressMessages(readr::read_delim(tmp1FilePath,
                                          delim = ";"))

  # extract + remove frameIndex/frameTime
  frameIndex = df$frameIndex
  frameTime = df$frameTime

  df$frameIndex = NULL
  df$frameTime = NULL

  df = as.matrix(df)

  colNames = colnames(df)

  # get start time
  startTime = frameTime[1]

  # create AsspDataObj
  ado = list()

  attr(ado, "sampleRate") = 1/frameTime[2] # second frameTime should be stepsize

  tmpObj = wrassp::read.AsspDataObj(path)
  attr(ado, "origFreq") = attr(tmpObj, "sampleRate")

  attr(ado, "startTime") = startTime

  # attr(ado, "startRecord") = as.integer(1)

  attr(ado, "endRecord") = as.integer(nrow(df))

  class(ado) = "AsspDataObj"

  wrassp::AsspFileFormat(ado) <- "SSFF"
  wrassp::AsspDataFormat(ado) <- as.integer(2)

  # add every column as new track
  if(columsAsTracks){
    attr(ado, "trackFormats") = rep("REAL32", ncol(df))
    for(i in 1:ncol(df)){
      ado = wrassp::addTrack(ado,
                             trackname = colNames[i],
                             data = df[,i],
                             format = "REAL32")
    }
  }else{
    attr(ado, "trackFormats") = "REAL32"
    ado = wrassp::addTrack(ado,
                           trackname = "SMILExtractAll",
                           data = df,
                           format = "REAL32")

  }

  return(ado)
}

How this function can be applied to wav files of an emuDB is shown below.

library(emuR)

# create demo data in tempdir()
create_emuRdemoData(tempdir())

# create path to demo database
path2ae = file.path(tempdir(), "emuR_demoData", "ae_emuDB")

# list all .wav files in the ae emuDB
paths2wavFiles = list.files(path2ae,
                            pattern = "*.wav$",
                            recursive = TRUE,
                            full.names = TRUE)

# loop through files
for(fp in paths2wavFiles){
  ado = SMILExtract2AsspDataObj(fp,
                                SMILExtractPath = "~/programs/opensmile-2.3.0/bin/SMILExtract",
                                configPath = "~/programs/opensmile-2.3.0/config/demo/demo1_energy.conf")
  newPath = paste0(file_path_sans_ext(fp), '.SMILExtract')
  # print(paste0(fp, ' -> ', newPath)) # uncomment for simple log
  write.AsspDataObj(ado, file = newPath)
}

# load emuDB
# (verbose = F is only set to avoid additional output in manual)
ae = load_emuDB(path2ae, verbose = FALSE)

# add SSFF track definition
add_ssffTrackDefinition(ae,
                        name = "SMILExtract",
                        columnName = "pcm_LOGenergy",
                        fileExtension = "SMILExtract")

# test query + get_trackdata
sl = query(ae, "Phonetic == n")
# (verbose = F is only set to avoid additional output in manual)
td = get_trackdata(ae,
                   sl,
                   ssffTrackName = "SMILExtract",
                   verbose = F)

# test display
set_signalCanvasesOrder(ae,
                        perspectiveName = "default",
                        order = c("OSCI", "SPEC", "SMILExtract"))

# serve(ae) # uncomment to view in EMU-webApp