@inproceedings {Moritz1718_2016, year = {2016}, author = {Moritz, Niko and Schröder, Jens and Goetze, Stefan and Anemüller, Jörn and Kollmeier, Birger}, title = {Acoustic Scene Classification using Time-Delay Neural Networks and Amplitude Modulation Filter Bank Features}, booktitle = {Detection and Classification of Acoustic Scenes and Events}, abstract = {This paper presents a system for acoustic scene classification (SC) that is applied to data of the SC task of the DCASE’16 challenge (Task 1). The proposed method is based on extracting acoustic features that employ a relatively long temporal context, i.e., amplitude modulation filer bank (AMFB) features, prior to detection of acoustic scenes using a neural network (NN) based classification approach. Recurrent neural networks (RNN) are well suited to model long-term acoustic dependencies that are known to encode important information for SC tasks. However, RNNs require a relatively large amount of training data in comparison to feed-forward deep neural networks (DNNs). Hence, the time-delay neural network (TDNN) approach is used in the present work that enables analysis of long contextual information similar to RNNs but with training efforts comparable to conventional DNNs. The proposed SC system attains a recognition accuracy of 76.5 %, which is 4.0 % higher compared to the DCASE’16 baseline system. } }