diploma1.bib

@inproceedings{bu2010music,
author = {Bu, Jiajun and Tan, Shulong and Chen, Chun and Wang, Can and Wu, Hao and Zhang, Lijun and He, Xiaofei},
booktitle = {Proceedings of the international conference on Multimedia},
organization = {ACM},
pages = {391--400},
title = {{Music recommendation by unified hypergraph: combining social media information and music content}},
year = {2010}
}
@book{lawrence2008fundamentals,
author = {Lawrence, Rabiner},
publisher = {Pearson Education India},
title = {{Fundamentals of speech Recognition}},
year = {2008}
}
@inproceedings{julier1997consistent,
author = {Julier, Simon J and Uhlmann, Jeffrey K},
booktitle = {AeroSense'97},
organization = {International Society for Optics and Photonics},
pages = {110--121},
title = {{Consistent debiased method for converting between polar and Cartesian coordinate systems}},
year = {1997}
}
@inproceedings{susstrunk1999standard,
author = {S\"{u}sstrunk, Sabine and Buckley, Robert and Swen, Steve},
booktitle = {Color and Imaging Conference},
number = {1},
organization = {Society for Imaging Science and Technology},
pages = {127--134},
title = {{Standard RGB color spaces}},
volume = {1999},
year = {1999}
}
@inproceedings{Sural2002,
author = {Sural, S. and Pramanik, S.},
booktitle = {Proceedings. International Conference on Image Processing},
doi = {10.1109/ICIP.2002.1040019},
isbn = {0-7803-7622-6},
issn = {1522-4880},
keywords = {Content based retrieval,Feature extraction,HSV color space,Histograms,Image analysis,Image color analysis,Image retrieval,Image segmentation,Pixel,RGB color space,Smoothing methods,Visual perception,content based image retrieval,content-based retrieval,feature extraction,histogram generation,hue value,image colour analysis,image pixel,image retrieval,image segmentation,intensity value,object identification,pixel features extraction,saturation value,smoothing methods,statistical analysis,uniform color transition,visual databases,visual perception,window-based smoothing},
language = {English},
pages = {II--589--II--592},
publisher = {IEEE},
title = {{Segmentation and histogram generation using the HSV color space for image retrieval}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=1040019},
volume = {2},
year = {2002}
}
@inproceedings{chen2001music,
author = {Chen, Hung-Chen and Chen, Arbee L P},
booktitle = {Proceedings of the tenth international conference on Information and knowledge management},
organization = {ACM},
pages = {231--238},
title = {{A music recommendation system based on music data grouping and user interests}},
year = {2001}
}
@incollection{JaeSikLee2006,
address = {Berlin, Heidelberg},
author = {{Jae Sik Lee}, Jin Chun Lee},
booktitle = {Smart Sensing and Context},
doi = {10.1007/11907503},
editor = {Havinga, Paul and Lijding, Maria and Meratnia, Nirvana and Wegdam, Maarten},
isbn = {978-3-540-47842-3},
pages = {190--203},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
title = {{Music for My Mood: A Music Recommendation System Based on Context Reasoning}},
url = {http://www.springerlink.com/index/10.1007/11907503},
volume = {4272},
year = {2006}
}
@article{Li2007,
author = {Li, Yipeng and Wang, DeLiang},
doi = {10.1109/TASL.2006.889789},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech and Language Processing},
keywords = {Auditory system,Automatic speech recognition,Cognitive science,Computer science,Humans,Instruments,Laboratories,Music information retrieval,Predominant pitch detection,Speech recognition,Time frequency analysis,audio recording,lyrics recognition,monaural recordings,music,music accompaniment,music information retrieval,predominant pitch detection,singer identification,singing voice detection,singing voice separation,sound separation,speech processing,speech recognition,time-frequency segments,track seperation},
language = {English},
mendeley-tags = {track seperation},
month = may,
number = {4},
pages = {1475--1487},
publisher = {IEEE},
title = {{Separation of Singing Voice From Music Accompaniment for Monaural Recordings}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=4156205},
volume = {15},
year = {2007}
}
@article{Gillet2008,
author = {Gillet, O. and Richard, G.},
doi = {10.1109/TASL.2007.914120},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
keywords = {Drum signals,Wiener filtering,Wiener filtering-based separation method,Wiener filters,audio signal processing,drum signals separation,drum signals transcription,drum track extraction,feature selection,filtering theory,fusion strategies,harmonic-noise decomposition,harmonic/noise decomposition,music,music transcription,musical instruments,polyphonic music,source separation,support vector machine (SVM),time-frequency analysis,time-frequency masking,track seperation},
language = {English},
mendeley-tags = {track seperation},
month = mar,
number = {3},
pages = {529--540},
publisher = {IEEE},
title = {{Transcription and Separation of Drum Signals From Polyphonic Music}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=4443887},
volume = {16},
year = {2008}
}
@inproceedings{bogdanov2013essentia,
author = {Bogdanov, Dmitry and Wack, Nicolas and G\'{o}mez, Emilia and Gulati, Sankalp and Herrera, Perfecto and Mayor, Oscar and Roma, Gerard and Salamon, Justin and Zapata, Jos\'{e} R and Serra, Xavier},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
organization = {Citeseer},
pages = {493--498},
title = {{Essentia: An Audio Analysis Library for Music Information Retrieval.}},
year = {2013}
}
@inproceedings{saari2013role,
author = {Saari, Pasi and Eerola, Tuomas and Fazekas, Gy\"{o}rgy and Barthet, Mathieu and Lartillot, Olivier and Sandler, Mark B},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {201--206},
title = {{The Role of Audio and Tags in Music Mood Prediction: A Study Using Semantic Layer Projection.}},
year = {2013}
}
@inproceedings{watson2012modeling,
author = {Watson, Diane and Mandryk, Regan L},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {31--36},
title = {{Modeling Musical Mood From Audio Features and Listening Context on an In-Situ Data Set.}},
year = {2012}
}
@inproceedings{chu2010lamp,
author = {Chu, Wei-rong and Tsai, RT-H and Wu, Ying-Sian and Wu, Hui-Hsin and Chen, Hung-Yi and Hsu, JY-J},
booktitle = {Technologies and Applications of Artificial Intelligence (TAAI), 2010 International Conference on},
organization = {IEEE},
pages = {53--59},
title = {{LAMP, a lyrics and audio mandopop dataset for music mood estimation: Dataset compilation, system construction, and testing}},
year = {2010}
}
@phdthesis{pesek2012prepoznavanje,
author = {Pesek, Matev\v{z}},
school = {M. Pesek},
title = {{Prepoznavanje akordov s hierarhi\{\v{c}\}nim kompozicionalnim modelom: diplomsko delo}},
year = {2012}
}
@article{la2001harmonic,
author = {{La Rue}, Jan},
journal = {The Journal of Musicology},
number = {2},
pages = {221--248},
publisher = {JSTOR},
title = {{Harmonic Rhythm in the Beethoven symphonies}},
volume = {18},
year = {2001}
}
@article{terhardt1974pitch,
author = {Terhardt, Ernst},
journal = {The Journal of the Acoustical Society of America},
number = {5},
pages = {1061--1069},
publisher = {Acoustical Society of America},
title = {{Pitch, consonance, and harmony}},
volume = {55},
year = {1974}
}
@inproceedings{zhu2005music,
author = {Zhu, Yongwei and Kankanhalli, Mohan S and Gao, Sheng},
booktitle = {Multimedia Modelling Conference, 2005. MMM 2005. Proceedings of the 11th International},
organization = {IEEE},
pages = {30--37},
title = {{Music key detection for musical audio}},
year = {2005}
}
@inproceedings{gouyon2000classifying,
author = {Gouyon, F and Delerue, O and Pachet, F},
booktitle = {Proceedings of the COST G-6 Conference on Digital Audio Effects},
title = {{Classifying percussive sounds: a matter of zero-crossing rate?}},
year = {2000}
}
@inproceedings{brossier2004real,
author = {Brossier, Paul and Bello, Juan Pablo and Plumbley, Mark D},
booktitle = {Proceedings of the ICMC},
title = {{Real-time temporal segmentation of note objects in music signals}},
year = {2004}
}
@inproceedings{gouyon2001exploration,
author = {Gouyon, Fabien and Herrera, Perfecto},
booktitle = {Proceedings of MOSART: Workshop on Current Directions in Computer Music},
title = {{Exploration of techniques for automatic labeling of audio drum tracks instruments}},
year = {2001}
}
@article{gunderson2007musical,
author = {Gunderson, Steinar Heimdal},
publisher = {Institutt for elektronikk og telekommunikasjon},
title = {{Musical descriptors: An assessment of psychoacoustical models in the presence of lossy compression}},
year = {2007}
}
@phdthesis{bogdanov2013form,
address = {Barcelona, Spain},
author = {Bogdanov, D},
keywords = { music information retrieval, music recommendation, music similarity, personalization, preference elicitation, recommender systems, user modeling, visualization,music discovery},
pages = {227},
school = {Universitat Pompeu Fabra},
title = {{From music similarity to music recommendation: Computational approaches based on audio features and metadata}},
year = {2013}
}
@book{lenko2009pomen,
author = {Lenko, Mira and Kogov\v{s}ek, Tina and Stankovi\'{c}, Peter},
publisher = {M. Lenko},
title = {{Pomen glasbe v o\v{c}eh mladih: diplomsko delo}},
year = {2009}
}
@incollection{Krause2012,
address = {London},
author = {Krause, Bernie},
booktitle = {The Great Animal Orchestra: Finding the Origins of Music in the World's Wild Places},
chapter = {Echonest o},
pages = {5--10},
publisher = {Hachette Digital, Inc.},
title = {{Echonest of the past}},
year = {2012}
}
@incollection{Wallin2001,
author = {Wallin, Nils Lenart and {Merker Bjorn} and Brown, Steven},
booktitle = {The origins of music},
publisher = {MIT Press},
title = {{The origins of music}},
year = {2001}
}
@inproceedings{Pesek2013c,
author = {Pesek, Matev\v{z} and Guna, Jo\v{z}e and Leonardis, Ale\v{s} and Marolt, Matija},
booktitle = {Proceedings of the 4th International Conference World Usability Day Slovenia 2013},
pages = {56--59},
title = {{Visualization of a deep architecture using the compositional hierarchical model}},
year = {2013}
}
@inproceedings{Woolhouse2006,
address = {Bologna},
author = {Woolhouse, Matthew and Cross, Ian and Horton, Timothy},
booktitle = {Proceedings of International Conference on Music Perception and Cognition},
title = {{The perception of non-adjecent harmonic relations}},
year = {2006}
}
@inproceedings{Hinton1983,
author = {Hinton, Geoffrey E and Sejnowski, Terrence J},
booktitle = {Proceedings of the IEEE conference on Computer Vision and Pattern Recognition (CVPR)},
pages = {448--453},
title = {{Optimal Perceptual Inference}},
year = {1983}
}
@book{Lerdahl1983,
author = {Lerdahl, Fred and Jackendoff, Ray},
publisher = {Cambridge: MIT Press},
title = {{A generative theory of tonal music}},
year = {1983}
}
@inproceedings{ShigekiSagayama2004,
address = {Jeju, Korea},
author = {Sagayama, Shigeki and Takahashi, Keigo},
booktitle = {ISCA Tutorial and Research Workshop on Statistical and Perceptual Audio Processing},
title = {{Specmurt anasylis: A piano-roll-visualization of polyphonic music signal by deconvolution of log-frequency spectrum}},
year = {2004}
}
@article{schuller2010mister,
author = {Schuller, Bj\"{o}rn and Hage, Clemens and Schuller, Dagmar and Rigoll, Gerhard},
journal = {Journal of New Music Research},
number = {1},
pages = {13--34},
publisher = {Taylor \& Francis},
title = {{‘Mister DJ, Cheer Me Up!’: Musical and textual features for automatic mood classification}},
volume = {39},
year = {2010}
}
@inproceedings{Kashino1995,
address = {Quebec},
author = {Kashino, Kunio and Nakadai, Kazuhiro and Kinoshita, Tomoyoshi and Tanaka, Hidehiko},
booktitle = {International Joint Conference on Artificial Intelligence},
pages = {158--164},
title = {{Organization of Hierarchical Perceptual Sounds: Music Scene Analysis with Autonomous Processing Modules and a Quantitative Information Integration Mechanism}},
url = {http://citeseer.uark.edu:8380/citeseerx/showciting;jsessionid=D58CCAED426097BA141A1A6547B06F36?cid=3408864},
year = {1995}
}
@inproceedings{Pikrakis2013,
author = {Pikrakis, Aggelos},
booktitle = {6th International Workshop on Machine Learning and Music, held in conjunction with the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, ECML/PKDD 2013},
pages = {1--4},
title = {{A Deep Learning Approach to Rhythm Modelling with Applications}},
year = {2013}
}
@book{Temperley2007,
author = {Temperley, David},
pages = {244},
publisher = {MIT Press},
title = {{Music and probability}},
year = {2007}
}
@article{Foote1999,
author = {Foote, Jonathan},
journal = {Multimedia Systems},
number = {1},
pages = {2--10},
title = {{An overview of audio information retrieval}},
url = {http://link.springer.com/article/10.1007/s005300050106?LI=true\#page-1},
volume = {7},
year = {1999}
}
@inproceedings{Mauch2008a,
address = {Sapporo},
author = {Mauch, Matthias and M\"{u}llensiefen, Daniel and Dixon, Simon and Wiggins, Geraint},
booktitle = {Proceedings of International Conference of Music Perception and Cognition},
title = {{Can Statistical Language Models be Used for the Analysis of Harmonic Progressions?}},
year = {2008}
}
@inproceedings{schmidt2009projection,
author = {Schmidt, Erik M and Kim, Youngmoo E},
booktitle = {10th International Society for Music Information Retrieval Conference. ISMIR},
title = {{Projection of acoustic features to continuous valence-arousal mood labels via regression}},
year = {2009}
}
@article{Hassenzahl2003,
author = {Hassenzahl, M and Burmester, M and Koller, F},
journal = {Mensch \& Computer},
title = {{AttrakDiff: A questionnaire to measure perceived hedonic and pragmatic quality}},
url = {http://scholar.google.si/scholar?hl=en\&q=attrakdiff\&btnG=\&as\_sdt=1,5\&as\_sdtp=\#1},
year = {2003}
}
@inproceedings{Pesek2013b,
address = {Ljubljana},
author = {Pesek, Matev\v{z} and Poredo\v{s}, Mojca and Guna, Jo\v{z}e and Stojmenova, Emilija and Marolt, Matija},
booktitle = {Proceedings of the 4th International Conference World Usability Day Slovenia 2013},
pages = {53--55},
title = {{Mood-dependent visual representation of audio recordings for music recommendation}},
year = {2013}
}
@article{Roebel2010,
author = {Roebel, Axel and Rodet, Xavier},
doi = {10.1109/TASL.2009.2030006},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
number = {6},
pages = {1116--1126},
publisher = {IEEE},
title = {{Multiple Fundamental Frequency Estimation and Polyphony Inference of Polyphonic Music Signals}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=5200519},
volume = {18},
year = {2010}
}
@article{Ellis2006,
author = {Ellis, Daniel P W and Poliner, Graham E},
doi = {10.1007/s10994-006-8373-9},
issn = {0885-6125},
journal = {Machine Learning},
number = {2-3},
pages = {439--456},
title = {{Classification-based melody transcription}},
url = {http://link.springer.com/10.1007/s10994-006-8373-9},
volume = {65},
year = {2006}
}
@inproceedings{Pesek2013,
author = {Pesek, Matev\v{z} and Marolt, Matija},
booktitle = {6th International Workshop on Machine Learning and Music, held in conjunction with the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, ECML/PKDD 2013},
title = {{Chord estimation using compositional hierarchical model}},
year = {2013}
}
@inproceedings{Scholz2009,
abstract = {The modeling of music as a language is a core issue for a wide range of applications such as polyphonic music retrieval, automatic style identification, audio to symbolic music transcription and computer-assisted composition. In this paper, we focus on the modeling of chord sequences by probabilistic N-grams. Previous studies using these models have achieved limited success, due to overfitting and to the use of a single chord labeling scheme. We investigate these issues using model smoothing and selection techniques initially designed for spoken language modeling. This approach is evaluated over a set of songs by The Beatles, considering several chord labeling schemes. Initial results show that the accuracy of N-grams is increased but that additional improvements may still be achieved in the future using more advanced, possibly music-specific, smoothing techniques.},
author = {Scholz, Ricardo and Vincent, Emmanuel and Bimbot, Frederic},
booktitle = {Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
doi = {10.1109/ICASSP.2009.4959518},
isbn = {978-1-4244-2353-8},
issn = {1520-6149},
keywords = {Dictionaries,Hidden Markov models,History,Labeling,Music,Music information retrieval,N-grams,Natural languages,Robustness,Smoothing methods,Testing,Training data,audio-to-symbolic music transcription,automatic style identification,computer-assisted composition,information retrieval,model selection,model smoothing,model smoothing techniques,musical chord sequences,polyphonic music retrieval,probabilistic N-grams,probabilistic modeling,probability,single chord labeling scheme},
month = apr,
pages = {53--56},
publisher = {IEEE},
shorttitle = {Acoustics, Speech and Signal Processing, 2009. ICA},
title = {{Robust modeling of musical chord sequences using probabilistic N-grams}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=4959518},
year = {2009}
}
@article{Tolonen2000,
author = {Tolonen, Tero and Karjalainen, Matti},
journal = {IEEE Transactions on Speech and Audio Processing},
number = {6},
pages = {708--716},
title = {{A computationally Efficient Multipitch Analysis Model}},
volume = {8},
year = {2000}
}
@article{Bangor2009,
author = {Bangor, Aaron and Kortum, Philip and Miller, James},
journal = {Journal of Usability Studies},
number = {3},
title = {{Determining What Individual SUS Scores Mean: Adding an Adjective Rating Scale}},
volume = {4},
year = {2009}
}
@inproceedings{Farbook2010,
address = {Seattle},
author = {Farbood, Morwaread},
booktitle = {Proceedings of International Conference of Music Perception and Cognition},
title = {{Working memory and the perception of hierarchical tonal structures}},
year = {2010}
}
@inproceedings{Humphrey2012a,
address = {New York},
author = {Humphrey, Eric J and Cho, Taemin and Bello, Juan P},
booktitle = {Acoustics, Speech and Signal Processing (ICASSP)},
pages = {453--456},
title = {{Learning a Robust Tonnetz-Space Transform for Automatic Chord recognition}},
year = {2012}
}
@inproceedings{Klapuri,
author = {Klapuri, A P},
booktitle = {IEEE Workshop on Applications of Signal Processing to Audio and Acoustics, 2005.},
doi = {10.1109/ASPAA.2005.1540227},
isbn = {0-7803-9154-3},
keywords = {Acoustic signal processing,Auditory system,Computational modeling,Computer peripherals,Frequency estimation,Humans,Multiple signal classification,Music,Signal analysis,Signal processing,acoustic signal detection,concurrent musical sounds,human auditory periphery,multiple-fundamental frequency estimation method,periodicity analysis mechanism,peripheral hearing model},
pages = {291--294},
publisher = {IEEE},
title = {{A perceptually motivated multiple-F0 estimation method}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=1540227},
year = {2005}
}
@inproceedings{MattiRyynanen2006,
author = {Ryyn\"{a}nen, Matti and Klapuri, Anssi},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {222--227},
title = {{Transcription of the singing melody in polyphonic music}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.79.7724},
year = {2006}
}
@article{Colibazzi2010,
abstract = {The circumplex model of affect construes all emotions as linear combinations of 2 independent neurophysiological dimensions, valence and arousal. We used functional magnetic resonance imaging to identify the neural networks subserving valence and arousal, and we assessed, in 10 participants, the associations of the BOLD (blood oxygen level-dependent) response, an indirect index of neural activity, with ratings of valence and arousal during the emotional experiences induced by the presentation of evocative sentences. Unpleasant emotional experience was associated with increased BOLD signal intensities in the supplementary motor, anterior midcingulate, right dorsolateral prefrontal, occipito-temporal, inferior parietal, and cerebellar cortices. Highly arousing emotions were associated with increased BOLD signal intensities in the left thalamus, globus pallidus, caudate, parahippocampal gyrus, amygdala, premotor cortex, and cerebellar vermis. Separate analyses using a finite impulse response model confirmed these results and revealed that pleasant emotions engaged an additional network that included the midbrain, ventral striatum, and caudate nucleus, all portions of a reward circuit. These findings suggest the existence of distinct networks subserving the valence and arousal dimensions of emotions, with midline and medial temporal lobe structures mediating arousal and dorsal cortical areas and mesolimbic pathways mediating valence.},
author = {Colibazzi, Tiziano and Posner, Jonathan and Wang, Zhishun and Gorman, Daniel and Gerber, Andrew and Yu, Shan and Zhu, Hongtu and Kangarlu, Alayar and Duan, Yunsuo and Russell, James A and Peterson, Bradley S},
doi = {10.1037/a0018484},
issn = {1931-1516},
journal = {Emotion (Washington, D.C.)},
keywords = {Adult,Amygdala,Amygdala: physiology,Arousal,Arousal: physiology,Brain,Brain: physiology,Caudate Nucleus,Caudate Nucleus: physiology,Cerebellar Cortex,Cerebellar Cortex: physiology,Emotions,Emotions: physiology,Female,Globus Pallidus,Globus Pallidus: physiology,Humans,Magnetic Resonance Imaging,Male,Parahippocampal Gyrus,Parahippocampal Gyrus: physiology,Thalamus,Thalamus: physiology,Young Adult},
number = {3},
pages = {377--389},
pmid = {20515226},
title = {{Neural systems subserving valence and arousal during the experience of induced emotions.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/20515226},
volume = {10},
year = {2010}
}
@inproceedings{Boulanger-Lewandowski2011,
author = {Boulanger-Lewandowski, N and Vincent, P and Bengio, Yoshua},
booktitle = {Snowbird Learning workshop},
title = {{Energy-based Recurrent Neural Network for Multiple Fundamental Frequency Estimation}},
year = {2011}
}
@article{McDermott2008,
author = {McDermott, Josh H and Oxenham, Andrew J},
journal = {Current opinion in Neurobiology},
number = {18},
pages = {452--463},
title = {{Music perception, pitch and the auditory system}},
year = {2008}
}
@article{SJ2009,
author = {Morrison, Stephen J and Demorest, Stephen M},
journal = {Progress in brain research},
number = {178},
pages = {67--77},
title = {{Cultural constraints on music perception and cognition}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/19874962},
year = {2009}
}
@book{Rosenblatt1962,
author = {Rosenblatt, Frank},
pages = {616},
publisher = {Spartan Books},
title = {{Principles of neurodynamics: perceptrons and the theory of brain mechanisms}},
url = {http://books.google.ca/books/about/Principles\_of\_neurodynamics.html?id=7FhRAAAAMAAJ\&pgis=1},
year = {1962}
}
@inproceedings{schmidt2010prediction,
author = {Schmidt, Erik M and Kim, Youngmoo E},
booktitle = {ISMIR},
pages = {465--470},
title = {{Prediction of Time-varying Musical Mood Distributions from Audio.}},
year = {2010}
}
@article{Paraskevopoulos2010,
author = {Paraskevopoulos, Evangelos and Tsapkini, Kyrana and Peretz, Isabelle},
journal = {Journal of the International Neuropsychological Society},
number = {4},
pages = {1--10},
title = {{Cultural aspects of music perception: Validation of a Greek version of the Montreal Battery of Evaluation of Amusias}},
volume = {16},
year = {2010}
}
@inproceedings{Hamel2010,
author = {Hamel, Philippe and Eck, Douglas},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {339--344},
title = {{Learning Features from Music Audio with Deep Belief Networks}},
year = {2010}
}
@inproceedings{LardeurERHS09_ClassAudioProduction_ICASSP,
author = {Lardeur, M and Essid, S and Richard, G and Haller, M and Sikora, T},
booktitle = {Acoustics, Speech and Signal Processing, 2009. ICASSP 2009. IEEE International Conference on},
doi = {10.1109/ICASSP.2009.4959918},
issn = {1520-6149},
month = apr,
pages = {1653--1656},
title = {{Incorporating prior knowledge on the digital media creation process into audio classifiers}},
year = {2009}
}
@article{Braun1999,
author = {Braun, Martin},
journal = {Hearing Research},
pages = {71--82},
title = {{Audtiroy midbrain laminar structure appears adapted to f0 extraction: further evidence and implications of the double critical bandwidth}},
volume = {129},
year = {1999}
}
@article{Bengio2009,
author = {Bengio, Yoshua},
journal = {Foundations and Trends® in Machine Learning},
number = {1},
pages = {1--127},
publisher = {Foundations and Trends in Machine Learning},
title = {{Learning Deep Architectures for AI}},
volume = {2},
year = {2009}
}
@inproceedings{Noland2006,
address = {Victoria},
author = {Noland, Katy and Sandler, Mark},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{Key Estimation Using a Hidden Markov Model}},
year = {2006}
}
@inproceedings{Boulanger-Lewandowski2013,
author = {Boulanger-Lewandowski, Nicolas and Bengio, Yoshua and Vincent, Pascal},
booktitle = {2013 IEEE International Conference on Acoustics, Speech and Signal Processing},
doi = {10.1109/ICASSP.2013.6638244},
isbn = {978-1-4799-0356-6},
issn = {1520-6149},
keywords = {Accuracy,Hidden Markov models,Noise,Recurrent neural networks,Sequence transduction,Smoothing methods,Training,Vectors,acoustic transducers,audio signal processing,error statistics,global distribution mode,high dimensional output sequence,high dimensional sequence transduction,music,musically plausible transcription,polyphonic audio music,polyphonic transcription,probabilistic model,probability,realistic output distribution,recurrent neural nets,recurrent neural network,restricted Boltzmann machine,symbolic notation,test error rate},
pages = {3178--3182},
publisher = {IEEE},
title = {{High-dimensional sequence transduction}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=6638244},
year = {2013}
}
@book{Gelfand2004,
author = {Gelfand, Stanley A},
pages = {312},
title = {{Hearing: An introduction to psychological and physiological acoustics}},
year = {2004}
}
@incollection{Fidler2009,
author = {Fidler, Sanja and Boben, Marko and Leonardis, Ale\v{s}},
booktitle = {Object Categorization: Computer and Human Vision Perspectives},
pages = {196--215},
publisher = {Cambridge University Press},
title = {{Learning Hierarchical Compositional Representations of Object Structure}},
year = {2009}
}
@article{Melara1989,
author = {Melara, Robert D},
journal = {Journal of Experimental Psychology: Human Perception and Performacne},
number = {1},
pages = {69--79},
title = {{Dimensional Interaction Between Color and Pitch}},
volume = {15},
year = {1989}
}
@article{Amitay2006,
author = {Amitay, Sygal and Irwin, Amy and Moore, David R},
journal = {Nature Neuroscience},
number = {11},
pages = {1446--1448},
title = {{Discrimination learning induced by training with identical stimuli}},
volume = {9},
year = {2006}
}
@article{Hart2006,
author = {Hart, S G},
doi = {10.1177/154193120605000909},
issn = {1071-1813},
journal = {Proceedings of the Human Factors and Ergonomics Society Annual Meeting},
number = {9},
pages = {904--908},
publisher = {SAGE Publications},
title = {{Nasa-Task Load Index (NASA-TLX); 20 Years Later}},
url = {http://pro.sagepub.com/content/50/9/904.abstract},
volume = {50},
year = {2006}
}
@inproceedings{Pesek2014b,
author = {Pesek, Matev\v{z} and Godec, Primo\v{z} and Poredo\v{s}, Mojca and Strle, Gregor and Guna, Jo\v{z}e and Stojmenova, Emilija and Poga\v{c}nik, Matev\v{z} and Marolt, Matija},
booktitle = {Management Information Systems in Multimedia Art, Education, Entertainment, and Culture (MIS-MEDIA), IEEE Internation Conference on Multimedia \& Expo (ICME)},
pages = {1--4},
title = {{Capturing the Mood: Evaluation of the MoodStripe and MoodGraph Interfaces}},
year = {2014}
}
@article{Yu2011,
abstract = {The purpose of this article is to introduce the readers to the emerging technologies enabled by deep learning and to review the research work conducted in this area that is of direct relevance to signal processing. We also point out, in our view, the future research directions that may attract interests of and require efforts from more signal processing researchers and practitioners in this emerging area for advancing signal and information processing technology and applications.},
author = {Yu, Dong and Deng, Li},
doi = {10.1109/MSP.2010.939038},
issn = {1053-5888},
journal = {IEEE Signal Processing Magazine},
number = {1},
pages = {145--154},
shorttitle = {Signal Processing Magazine, IEEE},
title = {{Deep Learning and Its Applications to Signal and Information Processing [Exploratory DSP}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5670617},
volume = {28},
year = {2011}
}
@techreport{Gerhard,
address = {Regina},
author = {Gerhard, David},
institution = {University of Regina, Saskatchewan, Canada},
keywords = {Technical Report TR-CS},
mendeley-tags = {Technical Report TR-CS},
title = {{Pitch Extraction and Fundamental Frequency: History and Current Techniques}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.58.834},
year = {2003}
}
@inproceedings{Pesek2014,
address = {Taipei},
author = {Pesek, Matev\v{z} and Godec, Primo\v{z}},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{INTRODUCING A DATASET OF EMOTIONAL AND COLOR RESPONSES TO MUSIC}},
year = {2014}
}
@book{Klapuri2006,
address = {New York},
editor = {Klapuri, Anssi and Davy, Manuel},
keywords = {Image and Speech Processing,Pattern Recognition,Signal,Signal Processing Methods for Music Transcription},
pages = {440},
publisher = {Springer},
title = {{Signal Processing Methods for Music Transcription}},
url = {http://www.springer.com/engineering/signals/book/978-0-387-30667-4},
year = {2006}
}
@inproceedings{Ni2012,
address = {Porto},
author = {Ni, Yizhao and McVicar, Matt and Santos-Rodriguez, Raul and Bie, Tijl De},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {109--114},
title = {{Using Hyper-genre Training to Explore Genre Information for Automatic Chord Estimation}},
year = {2012}
}
@inproceedings{Song2012,
address = {London},
author = {Song, Y and Dixon, S and Pearce, M},
booktitle = {Proc. 9th Int. Symp. Computer Music Modelling and Retrieval (CMMR)},
pages = {395--410},
title = {{A survey of music recommendation systems and future perspectives}},
year = {2012}
}
@inproceedings{Mauch2010,
address = {Utrecht},
author = {Mauch, Matthias and Dixon, Simon},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{Approximate Note Transcription For The Improved Identification Of Difficult Chords}},
year = {2010}
}
@article{Felleman1991,
author = {Felleman, Daniel J and {Van Essen}, David C},
journal = {Cerebral Cortex},
number = {1},
pages = {1--47},
title = {{Distributed Hierarchical Processing in the Primate Cerebral Cortex}},
volume = {1},
year = {1991}
}
@article{Meredith2002,
abstract = {In previous approaches to repetition discovery in music, the music to be analysed has been represented using strings. However, there are certain types of interesting musical repetitions that cannot be discovered using string algorithms. We propose a geometric approach to repetition discovery in which the music is represented as a multidimensional dataset. Certain types of interesting musical repetition that cannot be found using string algorithms can efficiently be found using algorithms that process multidimensional datasets. Our approach allows polyphonic music to be analysed as efficiently as monophonic music and it can be used to discover polyphonic repeated patterns ?with gaps? in the timbre, dynamic and rhythmic structure of a passage as well as its pitch structure. We present two new algorithms: SIA and SIATEC. SIA computes all the maximal repeated patterns in a multidimensional dataset and SIATEC computes all the occurrences of all the maximal repeated patterns in a dataset. For a k -dimensional dataset of size n, the worstcase running time of SIA is O (kn 2 log 2 n) and the worst-case running time of SIATEC is O (kn 3). In previous approaches to repetition discovery in music, the music to be analysed has been represented using strings. However, there are certain types of interesting musical repetitions that cannot be discovered using string algorithms. We propose a geometric approach to repetition discovery in which the music is represented as a multidimensional dataset. Certain types of interesting musical repetition that cannot be found using string algorithms can efficiently be found using algorithms that process multidimensional datasets. Our approach allows polyphonic music to be analysed as efficiently as monophonic music and it can be used to discover polyphonic repeated patterns ?with gaps? in the timbre, dynamic and rhythmic structure of a passage as well as its pitch structure. We present two new algorithms: SIA and SIATEC. SIA computes all the maximal repeated patterns in a multidimensional dataset and SIATEC computes all the occurrences of all the maximal repeated patterns in a dataset. For a k -dimensional dataset of size n, the worstcase running time of SIA is O (kn 2 log 2 n) and the worst-case running time of SIATEC is O (kn 3).},
author = {Meredith, David and Lemstrom, Kjell and Wiggins, Geraint A},
doi = {10.1076/jnmr.31.4.321.14162},
issn = {0929-8215},
journal = {Journal of New Music Research},
month = dec,
number = {4},
pages = {321--345},
publisher = {Routledge},
title = {{Algorithms for discovering repeated patterns in multidimensional representations of polyphonic music}},
url = {http://www.tandfonline.com/doi/abs/10.1076/jnmr.31.4.321.14162},
volume = {31},
year = {2002}
}
@article{Remmington2000,
abstract = {The circumplex model of affect has been among the most widely studied representations of affect. Despite the considerable evidence cited in support of it, methods typically used to evaluate the model have substantial limitations. In this article, the authors attempt to correct past limitations by using a covariance structure model specifically designed to assess circumplex structure. This model was fit to 47 individual correlation matrices from published data sets. Analyses revealed that model fit was typically acceptable and that opposing affective states usually demonstrated strong negative correlations with one another. However, analyses also indicated substantial variability in both model fit and correlations among opposing affective states and suggested several characteristics of studies that partially accounted for this variability. Detailed examination of the locations of affective states for 10 of the correlation matrices with relatively optimal characteristics provided mixed support for the model.},
author = {Remmington, N A and Fabrigar, L R and Visser, P S},
issn = {0022-3514},
journal = {Journal of personality and social psychology},
keywords = { Psychological, Statistical,Affect,Confounding Factors (Epidemiology),Data Interpretation,Humans,Models},
number = {2},
pages = {286--300},
pmid = {10948981},
title = {{Reexamining the circumplex model of affect.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/10948981},
volume = {79},
year = {2000}
}
@article{turnbull2008semantic,
author = {Turnbull, Douglas and Barrington, Luke and Torres, David and Lanckriet, Gert},
journal = {Audio, Speech, and Language Processing, IEEE Transactions on},
number = {2},
pages = {467--476},
publisher = {IEEE},
title = {{Semantic annotation and retrieval of music and sound effects}},
volume = {16},
year = {2008}
}
@article{DavidA.2004,
author = {{David A.}, Schwartz and Purves, Dale},
journal = {Hearing Research},
number = {1-2},
pages = {31--46},
title = {{Pitch is determined by naturally occuring periodic sounds}},
year = {2004}
}
@inproceedings{Sheh2003,
address = {Baltimore},
author = {Sheh, Alexander and Ellis, Daniel},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {1--7},
title = {{Chord segmentation and recognition using EM-trained hidden Markov models}},
year = {2003}
}
@misc{Smith1983,
author = {Smith, Dave and Wood, Chet},
title = {{MIDI Musical Instrument Digital Interface Specification 1.0}},
year = {1983}
}
@book{Rokach2007,
author = {Rokach, Lior and Maimon, Oded Z},
pages = {244},
publisher = {World Scientific Publishing},
title = {{Data Mining with Decision Trees: Theory and Applications}},
year = {2007}
}
@inproceedings{Schmidt2011,
abstract = {The medium of music has evolved specifically for the expression of emotions, and it is natural for us to organize music in terms of its emotional associations. But while such organization is a natural process for humans, quantifying it empirically proves to be a very difficult task, and as such no dominant feature representation for music emotion recognition has yet emerged. Much of the difficulty in developing emotion-based features is the ambiguity of the ground-truth. Even using the smallest time window, opinions on the emotion are bound to vary and reflect some disagreement between listeners. In previous work, we have modeled human response labels to music in the arousal-valence (A-V) representation of affect as a time-varying, stochastic distribution. Current methods for automatic detection of emotion in music seek performance increases by combining several feature domains (e.g. loudness, timbre, harmony, rhythm). Such work has focused largely in dimensionality reduction for minor classification performance gains, but has provided little insight into the relationship between audio and emotional associations. In this new work we seek to employ regression-based deep belief networks to learn features directly from magnitude spectra. While the system is applied to the specific problem of music emotion recognition, it could be easily applied to any regression-based audio feature learning problem.},
author = {Schmidt, Erik M and Kim, Youngmoo E},
booktitle = {2011 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},
doi = {10.1109/ASPAA.2011.6082328},
isbn = {978-1-4577-0693-6},
issn = {1931-1168},
pages = {65--68},
publisher = {IEEE},
shorttitle = {Applications of Signal Processing to Audio and Aco},
title = {{Learning emotion-based acoustic features with deep belief networks}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6082328},
year = {2011}
}
@article{abdi2007method,
author = {Abdi, Herv\'{e}},
journal = {Encyclopedia of Measurement and Statistics. CA, USA: Thousand Oaks},
title = {{The method of least squares}},
year = {2007}
}
@article{Emiya2010,
abstract = {A new method for the estimation of multiple concurrent pitches in piano recordings is presented. It addresses the issue of overlapping overtones by modeling the spectral envelope of the overtones of each note with a smooth autoregressive model. For the background noise, a moving-average model is used and the combination of both tends to eliminate harmonic and sub-harmonic erroneous pitch estimations. This leads to a complete generative spectral model for simultaneous piano notes, which also explicitly includes the typical deviation from exact harmonicity in a piano overtone series. The pitch set which maximizes an approximate likelihood is selected from among a restricted number of possible pitch combinations as the one. Tests have been conducted on a large homemade database called MAPS, composed of piano recordings from a real upright piano and from high-quality samples.},
author = {Emiya, Valentin and Badeau, Roland and David, Bertrand},
doi = {10.1109/TASL.2009.2038819},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
keywords = {Acoustic signal analysis,MAPS,acoustic signal processing,audio processing,autoregressive processes,homemade database,moving-average model,multipitch estimation,multipitch estimation (MPE),musical instruments,overlapping overtones,piano,piano overtone series,piano recordings,piano sounds,probabilistic spectral smoothness principle,probability,smooth autoregressive model,smoothing methods,spectral analysis,spectral envelope modeling,spectral smoothness,transcription},
number = {6},
pages = {1643--1654},
shorttitle = {Audio, Speech, and Language Processing, IEEE Trans},
title = {{Multipitch Estimation of Piano Sounds Using a New Probabilistic Spectral Smoothness Principle}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5356234},
volume = {18},
year = {2010}
}
@article{Oudre2011,
author = {Oudre, Laurent and Grenier, Yves and Fevotte, Cedric},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
number = {7},
pages = {2222--2233},
title = {{Chord Recognition by Fitting Rescaled Chroma Vectors to Chord Templates}},
volume = {19},
year = {2011}
}
@article{Leonardis2007,
author = {Leonardis, Ale\v{s} and Fidler, Sanja},
journal = {Computer Vision and Pattern Recognition, IEEE},
pages = {1--8},
title = {{Towards scalable representations of object categories: Learning a hierarchy of parts}},
year = {2007}
}
@inproceedings{mauch:adt:2013,
author = {Mauch, Matthias and Ewert, Sebastian},
booktitle = {Proceedings of the 14th International Society for Music Information Retrieval Conference (ISMIR 2013)},
pages = {83--88},
title = {{The \{A\}udio \{D\}egradation \{T\}oolbox and its Application to Robustness Evaluation}},
year = {2013}
}
@incollection{Scherer2001,
address = {New York},
author = {Scherer, K R and Zentner, M R},
booktitle = {Music and emotion},
editor = {Juslin, P N and Sloboda, J A},
publisher = {Oxford University Press},
title = {{Emotional effects of music: production rules}},
year = {2001}
}
@article{Laskowski1979,
author = {Laskowski, Larry},
journal = {Journal of Music Theory},
number = {2},
pages = {304--307},
title = {{Heinrich Schenker: An Annotated Index to His Analyses of Musical Works}},
volume = {23},
year = {1979}
}
@article{Lewis2013,
author = {Lewis, James R},
journal = {Interacting with computers},
number = {4},
pages = {320--324},
title = {{Critical Review of ‘The Usability Metric for User Experience’}},
volume = {25},
year = {2013}
}
@article{Salamon2014,
abstract = {Melody extraction algorithms aim to produce a sequence of frequency values corresponding to the pitch of the dominant melody from a musical recording. Over the past decade, melody extraction has emerged as an active research topic, comprising a large variety of proposed algorithms spanning a wide range of techniques. This article provides an overview of these techniques, the applications for which melody extraction is useful, and the challenges that remain. We start with a discussion of ?melody? from both musical and signal processing perspectives and provide a case study that interprets the output of a melody extraction algorithm for specific excerpts. We then provide a comprehensive comparative analysis of melody extraction algorithms based on the results of an international evaluation campaign. We discuss issues of algorithm design, evaluation, and applications that build upon melody extraction. Finally, we discuss some of the remaining challenges in melody extraction research in terms of algorithmic performance, development, and evaluation methodology.},
author = {Salamon, Justin and Gomez, Emilia and Ellis, Daniel P W and Richard, Gael},
doi = {10.1109/MSP.2013.2271648},
issn = {1053-5888},
journal = {IEEE Signal Processing Magazine},
number = {2},
pages = {118--134},
shorttitle = {Signal Processing Magazine, IEEE},
title = {{Melody Extraction from Polyphonic Music Signals: Approaches, applications, and challenges}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6739213},
volume = {31},
year = {2014}
}
@inproceedings{Gomez2004,
address = {Barcelona},
author = {Gomez, Emilia and Herrera, Perfecto},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {92--95},
title = {{Estimating the Tonality of Polyphonic Audio Files: Cognitive versus Machine Learning Modelling Strategies}},
year = {2004}
}
@inproceedings{AndrewJ.2010,
address = {Seattle},
author = {Milne, Andrew J},
booktitle = {Proceedings of International Conference of Music Perception and Cognition},
title = {{Tonal music theory: A psychoacoustic explanation?}},
year = {2010}
}
@inproceedings{Smith2011,
address = {Miami},
author = {Smith, Jordan B L and Burgoyne, J Ashley and Fujinaga, Ichiro and {De Roure}, David and Downie, J Stephen},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
number = {Ismir},
pages = {555--560},
title = {{Design and Creation of a Large-scale Database of Structural Annotations}},
year = {2011}
}
@inproceedings{Lee2009,
author = {Lee, Honglak and Pham, Peter and Largman, Yan and Ng, Andrew Y},
booktitle = {Advances in Neural Information Processing Systems},
pages = {1096--1104},
title = {{Unsupervised feature learning for audio classification using convolutional deep belief networks}},
url = {http://papers.nips.cc/paper/3674-unsupervised-feature-learning-for-audio-classification-using-convolutional-deep-belief-networks},
year = {2009}
}
@inproceedings{schmidt2011modeling,
author = {Schmidt, Erik M and Kim, Youngmoo E},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {777--782},
title = {{Modeling Musical Emotion Dynamics with Conditional Random Fields.}},
year = {2011}
}
@inproceedings{Laurier2009a,
author = {Laurier, Cyril and Lartillot, Olivier and Eerola, Tuomas and Toiviainen, Petri},
booktitle = {Proceedings of the Conference of European Society for the Cognitive Sciences of Music (ESCOM)},
title = {{Exploring Relationships between Audio Features and Emotion in Music}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.182.728},
year = {2009}
}
@inproceedings{Schluter2013,
author = {Schluter, Jan and Bock, Sebastian},
booktitle = {6th International Workshop on Machine Learning and Music, held in conjunction with the European Conference on Machine Learning and Principles and Practice of Knowledge Discovery in Databases, ECML/PKDD 2013},
title = {{Musical Onset Detection with Convolutional Neural Networks}},
year = {2013}
}
@inproceedings{Dessein2010,
author = {Dessein, A and Cont, A and Lemaitre, G},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {489--494},
title = {{Real-time polyphonic music transcription with non-negative matrix factorization and beta-divergence}},
url = {http://hal.upmc.fr/hal-00708682},
year = {2010}
}
@book{Bregman1990,
author = {Bregman, Albert S},
pages = {773},
publisher = {MIT Press},
title = {{Auditory scene analysis - The perceptual organization of sound}},
year = {1990}
}
@inproceedings{Battenberg2012,
author = {Battenberg, Eric and Wessel, David},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {37--42},
title = {{Analyzing Drum Patterns using Conditional Deep Belief Networks}},
year = {2012}
}
@article{Balaguer-Ballester2009,
author = {Balaguer-Ballester, Emili and Clark, Nicolas R and Coath, Martin and Krumbholz, Katrin and Denham, Susan L},
journal = {PLoS Computational Biology},
number = {3},
pages = {1--15},
title = {{Understanding Pitch Perception as a Hierarchical Process with Top-Down Modulation}},
volume = {4},
year = {2009}
}
@article{Cooper2006,
author = {Cooper, Matthew and Foote, Jonathan and Pampalk, Elias and Tzanetakis, George},
journal = {Computer music journal},
number = {2},
pages = {42--62},
title = {{Visualization in Audio-Based Music Information Retrieval}},
volume = {30},
year = {2006}
}
@book{Palm1986,
address = {Berlin, Heidelberg},
doi = {10.1007/978-3-642-70911-1},
editor = {Palm, G\"{u}nther and Aertsen, Ad},
isbn = {978-3-642-70913-5},
publisher = {Springer Berlin Heidelberg},
title = {{Brain Theory}},
url = {http://www.springerlink.com/index/10.1007/978-3-642-70911-1},
year = {1986}
}
@book{Werner2012,
address = {New York},
author = {Werner, Lynne A and Abdala, Carolina and Keefe, Douglas H and Eggermont, Jos J and Moore, Jean K and Buss, Emily and Hall, Joseph W I I I and Grose, John H and Leibold, Lori J and Litovsky, Ruth Y and Panneton, Robin and Newman, Rochelle and Trainor, Laurel J and Unrau, Andrea and Eisenberg, Laurie S and Johnson, Karen C and Ambrose, Sophie E},
editor = {Jones, Mari Riess and Fay, Richard R and Popper, Arthur N},
pages = {284},
publisher = {Springer},
title = {{Human Auditory Development}},
year = {2012}
}
@inproceedings{Mauch2008,
address = {Philadelphia},
author = {Mauch, Mathias and Dixon, Simon},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {45--50},
title = {{A Discrete Mixture Model for Chord Labelling}},
volume = {1},
year = {2008}
}
@inproceedings{mauch:adt:2013,
author = {Mauch, Matthias and Ewert, Sebastian},
booktitle = {Proceedings of the 14th International Society for Music Information Retrieval Conference (ISMIR 2013)},
pages = {83--88},
title = {{The Audio Degradation Toolbox and its Application to Robustness Evaluation}},
year = {2013}
}
@article{Mohamed2010,
author = {Mohamed, Abdel-rahman and Dahl, George E and Hinton, Geoffrey},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
number = {1},
pages = {14--22},
title = {{Acoustic Modeling using Deep Belief Networks}},
volume = {20},
year = {2010}
}
@article{Beeli2007,
author = {Beeli, Gian and Esslen, Michaela and Jancke, Lutz},
journal = {Psychological Science},
number = {9},
pages = {788--792},
title = {{Frequency Correlates in Grapheme-Color Synaesthesia}},
volume = {18},
year = {2007}
}
@inproceedings{Humphrey2012,
address = {Porto},
author = {Humphrey, Eric J and Bello, Juan P and LeCun, Yann},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{Moving beyond feature design: deep architectures and automatic feature learning in music informatics}},
year = {2012}
}
@article{Lee2008,
author = {Lee, Kyogu and Stanley, Malcom},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
number = {2},
pages = {291--301},
title = {{Acoustic Chord Transcription and Key Extraction From Audio Using Key-Dependent HMMs Trained on Synthesized Audio}},
volume = {16},
year = {2008}
}
@inproceedings{Mauch2007,
address = {Vienna},
author = {Mauch, Matthias and Dixon, Simon and Harte, Christopher},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{Discovering Chord Idioms Through Beatles and Real Book Songs}},
year = {2007}
}
@article{Bengio2013,
abstract = {The success of machine learning algorithms generally depends on data representation, and we hypothesize that this is because different representations can entangle and hide more or less the different explanatory factors of variation behind the data. Although specific domain knowledge can be used to help design representations, learning with generic priors can also be used, and the quest for AI is motivating the design of more powerful representation-learning algorithms implementing such priors. This paper reviews recent work in the area of unsupervised feature learning and deep learning, covering advances in probabilistic models, autoencoders, manifold learning, and deep networks. This motivates longer term unanswered questions about the appropriate objectives for learning good representations, for computing representations (i.e., inference), and the geometrical connections between representation learning, density estimation, and manifold learning.},
author = {Bengio, Yoshua and Courville, Aaron and Vincent, Pascal},
doi = {10.1109/TPAMI.2013.50},
issn = {1939-3539},
journal = {IEEE transactions on pattern analysis and machine intelligence},
keywords = {Algorithms,Artificial Intelligence,Artificial Intelligence: trends,Humans,Neural Networks (Computer)},
number = {8},
pages = {1798--1828},
pmid = {23787338},
shorttitle = {Pattern Analysis and Machine Intelligence, IEEE Tr},
title = {{Representation learning: a review and new perspectives.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/23787338},
volume = {35},
year = {2013}
}
@phdthesis{Moorer1975,
author = {Moorer, James A},
pages = {171},
title = {{On the Segmentation and Analysis of Continuous Musical Sound by Digital Computer}},
url = {https://ccrma.stanford.edu/papers/segmentation-and-analysis-of-continuous-musical-sound-digital-computer},
year = {1975}
}
@inproceedings{Pesek2013a,
author = {Pesek, Matev\v{z} and Miheli\v{c}, France},
booktitle = {Zbornik dvaindvajsete mednarodne Elektrotehni\v{s}ke in ra\v{c}unalni\v{s}ke konference},
pages = {145--148},
title = {{Hidden Markov model for chord estimation using compositional hierarchical model features}},
year = {2013}
}
@book{Rabiner1975,
author = {Rabiner, Lawrence R and Gold, Bernard},
pages = {762},
publisher = {Prentice Hall},
title = {{Theory and application of digital signal processing}},
year = {1975}
}
@article{Papadopoulos2007,
author = {Papadopoulos, Helene and Peeters, Geoffroy},
journal = {Content-Based Multimedia Indexing},
title = {{Large-case Study of Chord Estimation Algorithms Based on Chroma Representation and HMM}},
volume = {53-60},
year = {2007}
}
@article{Eerola2010,
abstract = {The primary aim of the present study was to systematically compare perceived emotions in music using two different theoretical frameworks: the discrete emotion model, and the dimensional model of affect. A secondary aim was to introduce a new, improved set of stimuli for the study of music-mediated emotions. A large pilot study established a set of 110 film music excerpts, half were moderately and highly representative examples of five discrete emotions (anger, fear, sadness, happiness and tenderness), and the other half moderate and high examples of the six extremes of three bipolar dimensions (valence, energy arousal and tension arousal). These excerpts were rated in a listening experiment by 116 non-musicians. All target emotions of highly representative examples in both conceptual sets were discriminated by self-ratings. Linear mapping techniques between the discrete and dimensional models revealed a high correspondence along two central dimensions that can be labelled as valence and arousal, and the three dimensions could be reduced to two without significantly reducing the goodness of fit. The major difference between the discrete and categorical models concerned the poorer resolution of the discrete model in characterizing emotionally ambiguous examples. The study offers systematically structured and rich stimulus material for exploring emotional processing.},
author = {Eerola, T and Vuoskoski, J K},
doi = {10.1177/0305735610362821},
issn = {0305-7356},
journal = {Psychology of Music},
number = {1},
pages = {18--49},
title = {{A comparison of the discrete and dimensional models of emotion in music}},
url = {http://pom.sagepub.com/content/39/1/18.abstract},
volume = {39},
year = {2010}
}
@article{McColl2001,
author = {McColl, E and Jacoby, A and Thomas, L and Soutter, J and Bamford, C and Steen, N and Thomas, R and Harvey, E and Garratt, A and Bond, J},
issn = {1366-5278},
journal = {Health technology assessment (Winchester, England)},
keywords = { Bibliographic,Benchmarking,Data Collection,Data Collection: methods,Databases,Great Britain,Health Care Surveys,Health Care Surveys: methods,Health Personnel,Questionnaires,Research Design},
number = {31},
pages = {1--256},
pmid = {11809125},
title = {{Design and use of questionnaires: a review of best practice applicable to surveys of health service staff and patients.}},
url = {http://www.ncbi.nlm.nih.gov/pubmed/11809125},
volume = {5},
year = {2001}
}
@inproceedings{Bock2012,
address = {Porto},
author = {Bock, Sebastian and Krebs, Florian and Schedl, Markus},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{Evaluating the online capabilities of onset detection methods}},
year = {2012}
}
@book{Fastl2007,
author = {Fastl, Hugo and Zwicker, Eberhard},
pages = {416},
title = {{Psychoacoustics: Facts and models}},
year = {2007}
}
@inproceedings{Pesek,
address = {Dubrovnik},
author = {Pesek, Matevz and Leonardis, Ales and Marolt, Matija},
booktitle = {International Conference on Systems, Signals and Image Processing (IWSSIP), 2014},
issn = {2157-8672},
keywords = {Estimation,Hidden Markov models,Silicon carbide,audio chord estimation,compositional hierarchical model,deep learning,stacking generalization},
pages = {107--110},
publisher = {IEEE},
title = {{Boosting audio chord estimation using multiple classifiers}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=6837642},
year = {2014}
}
@inproceedings{Goodfellow2009,
author = {Goodfellow, Ian J and Le, Quoc V and Saxe, Andrew M and Lee, Honglak and Ng, Andrew Y},
booktitle = {Proceedings of Neural Information Processing Systems},
title = {{Measuring invariances in deep networks}},
year = {2009}
}
@inproceedings{XavierGlorot2011,
author = {{Xavier Glorot}, Antoine Bordes Yoshua Bengio},
booktitle = {ICML},
pages = {513--520},
title = {{Domain adaptation for large-scale sentiment classification: A deep learning approach}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.231.3442},
volume = {513-520},
year = {2011}
}
@book{Albert2013,
abstract = {Measuring the User Experience was the first book that focused on how to quantify the user experience. Now in the second edition, the authors include new material on how recent technologies have made it easier and more effective to collect a broader range of data about the user experience. As more UX and web professionals need to justify their design decisions with solid, reliable data, Measuring the User Experience provides the quantitative analysis training that these professionals need. The second edition presents new metrics such as emotional engagement, personas, keystroke analysis, and net promoter score. It also examines how new technologies coming from neuro-marketing and online market research can refine user experience measurement, helping usability and user experience practitioners make business cases to stakeholders. The book also contains new research and updated examples, including tips on writing online survey questions, six new case studies, and examples using the most recent version of Excel.Learn which metrics to select for every case, including behavioral, physiological, emotional, aesthetic, gestural, verbal, and physical, as well as more specialized metrics such as eye-tracking and clickstream data.Find a vendor-neutral examination of how to measure the user experience with web sites, digital products, and virtually any other type of product or system. Discover in-depth global case studies showing how organizations have successfully used metrics and the information they revealed.Companion site, www.measuringux.com, includes articles, tools, spreadsheets, presentations, and other resources to help you effectively measure the user experience},
author = {Albert, William and Tullis, Thomas},
isbn = {0124157920},
pages = {320},
publisher = {Newnes},
title = {{Measuring the User Experience: Collecting, Analyzing, and Presenting Usability Metrics (Google eBook)}},
url = {http://books.google.com/books?id=bPhLeMBLEkAC\&pgis=1},
year = {2013}
}
@article{Poliner2007,
author = {Poliner, Graham E and Ellis, Daniel P W and Ehmann, Andreas F and Gomez, Emilia and Streich, Sebastian and Ong, Beesuan},
doi = {10.1109/TASL.2006.889797},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech and Language Processing},
number = {4},
pages = {1247--1256},
publisher = {IEEE},
title = {{Melody Transcription From Music Audio: Approaches and Evaluation}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=4156215},
volume = {15},
year = {2007}
}
@article{Orio2006,
author = {Orio, Nicola},
journal = {Foundations and Trends® in Information Retrieval},
number = {1},
pages = {1--90},
title = {{Music Retrieval: A Tutorial and Review}},
volume = {1},
year = {2006}
}
@inproceedings{Bello2005,
address = {London},
author = {Bello, Juan P and Pickens, Jeremy},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {304--311},
title = {{A robust mid-level representation for harmonic content in music signals}},
year = {2005}
}
@article{Kumar2013,
abstract = {The physiological basis for musical hallucinations (MH) is not understood. One obstacle to understanding has been the lack of a method to manipulate the intensity of hallucination during the course of experiment. Residual inhibition, transient suppression of a phantom percept after the offset of a masking stimulus, has been used in the study of tinnitus. We report here a human subject whose MH were residually inhibited by short periods of music. Magnetoencephalography (MEG) allowed us to examine variation in the underlying oscillatory brain activity in different states. Source-space analysis capable of single-subject inference defined left-lateralised power increases, associated with stronger hallucinations, in the gamma band in left anterior superior temporal gyrus, and in the beta band in motor cortex and posteromedial cortex. The data indicate that these areas form a crucial network in the generation of MH, and are consistent with a model in which MH are generated by persistent reciprocal communication in a predictive coding hierarchy.},
author = {Kumar, Sukhbinder and Sedley, William and Barnes, Gareth R and Teki, Sundeep and Friston, Karl J and Griffiths, Timothy D},
journal = {Cortex},
keywords = {Auditory cortex,Beta oscillations,Gamma oscillations,Magnetoencephalography,Musical hallucinations,Predictive coding},
title = {{A brain basis for musical hallucinations}},
url = {http://www.sciencedirect.com/science/article/pii/S0010945213003080},
year = {2013}
}
@article{Conklin2010,
author = {Conklin, Darrell},
issn = {1088-467X},
journal = {Intelligent Data Analysis},
keywords = {Pattern discovery,anticorpus,chord sequences,distinctive pattern,folk songs,subsumption},
number = {5},
pages = {547--554},
publisher = {IOS Press},
title = {{Discovery of distinctive patterns in music}},
url = {http://dl.acm.org/citation.cfm?id=1859240.1859243},
volume = {14},
year = {2010}
}
@article{Rabiner1989,
author = {Rabiner, Lawrence R},
journal = {Proceedings of the IEEE},
number = {2},
pages = {257--286},
title = {{A tutorial on hidden Markov models and selected applicationsin speech recognition}},
volume = {77},
year = {1989}
}
@inproceedings{Bengio2007,
author = {Bengio, Yoshua and Lamblin, Pascal and Popovici, Dan and Larochelle, Hugo},
booktitle = {Advances in Neural Information Processing Systems},
pages = {153--160},
publisher = {MIT Press},
title = {{Greedy Layer-Wise Training of Deep Networks}},
year = {2007}
}
@article{Laurier2009,
author = {Laurier, Cyril and Meyers, Owen and Serr\`{a}, Joan and Blech, Martin and Herrera, Perfecto and Serra, Xavier},
doi = {10.1007/s11042-009-0360-2},
issn = {1380-7501},
journal = {Multimedia Tools and Applications},
number = {1},
pages = {161--184},
title = {{Indexing music by mood: design and integration of an automatic content-based annotator}},
url = {http://link.springer.com/10.1007/s11042-009-0360-2},
volume = {48},
year = {2009}
}
@incollection{Downie2010,
address = {Berlin},
author = {Downie, J Stephen and Ehmann, Andreas F and Bay, Mert and Jones, M Cameron},
booktitle = {Advances in Music Information Retrieval},
editor = {A.A., Wieczorkowska and Z.W., Ras},
pages = {93--115},
publisher = {Springer-Verlag},
title = {{The Music Information Retrieval Evaluation eXchange: Some Observations and Insights}},
year = {2010}
}
@inproceedings{Pesek2014c,
address = {Taipei},
author = {Pesek, Matev\v{z} and Leonardis, Ale\v{s} and Marolt, Matija},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{A compositional hierarchical model for music information retrieval}},
year = {2014}
}
@article{Peretz2003,
author = {Peretz, Isabelle and Coltheart, Max},
journal = {Nature Neuroscience},
number = {7},
pages = {688--691},
title = {{Modularity of music processing}},
volume = {6},
year = {2003}
}
@article{Pertusa2012,
author = {Pertusa, Antonio and I\~{n}esta, Jos\'{e} M},
doi = {10.1186/1687-6180-2012-27},
issn = {1687-6180},
journal = {EURASIP Journal on Advances in Signal Processing},
number = {1},
pages = {27},
title = {{Efficient methods for joint estimation of multiple fundamental frequencies in music signals}},
url = {http://asp.eurasipjournals.com/content/2012/1/27},
volume = {2012},
year = {2012}
}
@article{Smith2006,
author = {Smith, Evan C and Lewicki, Michael S},
journal = {Nature},
number = {2},
pages = {978--982},
title = {{Efficient Auditory Coding}},
volume = {439},
year = {2006}
}
@inproceedings{Hochenbaum2009,
author = {Hochenbaum, Jordan and Vallis, Owen},
booktitle = {Berlin Open Conference},
title = {{Bricktable: A Musical Tangible Multi-Touch Interface}},
year = {2009}
}
@article{Manfreda2006,
author = {Manfreda, Katja Lozar and Batagelj, Zenel and Vehovar, Vasja},
doi = {10.1111/j.1083-6101.2002.tb00149.x},
issn = {10836101},
journal = {Journal of Computer-Mediated Communication},
number = {3},
title = {{Design of Web Survey Questionnaires: Three Basic Experiments}},
url = {http://doi.wiley.com/10.1111/j.1083-6101.2002.tb00149.x},
volume = {7},
year = {2006}
}
@book{Holzinger2008,
address = {Berlin, Heidelberg},
doi = {10.1007/978-3-540-89350-9},
editor = {Holzinger, Andreas},
isbn = {978-3-540-89349-3},
issn = {0302-9743},
publisher = {Springer Berlin Heidelberg},
series = {Lecture Notes in Computer Science},
title = {{HCI and Usability for Education and Work}},
url = {http://www.springerlink.com/index/10.1007/978-3-540-89350-9},
volume = {5298},
year = {2008}
}
@article{Valdez1994,
author = {Valdez, Patricia and Mehrabian, Albert},
journal = {Journal of Experimental Psychology: General},
number = {4},
pages = {394--409},
title = {{Effects of Color on Emotions}},
volume = {123},
year = {1994}
}
@book{juslin2001music,
author = {Juslin, Patrik N and Sloboda, John A},
publisher = {Oxford University Press},
title = {{Music and emotion: Theory and research.}},
year = {2001}
}
@article{Levitin2005,
author = {Levitin, Daniel J and Rogers, Susan E},
journal = {TRENDS in Cognitive Sciences},
number = {1},
pages = {26--33},
title = {{Absolute pitch: perception, coding and controversies}},
volume = {9},
year = {2005}
}
@article{Tzanetakis2002,
abstract = {Musical genres are categorical labels created by humans to characterize pieces of music. A musical genre is characterized by the common characteristics shared by its members. These characteristics typically are related to the instrumentation, rhythmic structure, and harmonic content of the music. Genre hierarchies are commonly used to structure the large collections of music available on the Web. Currently musical genre annotation is performed manually. Automatic musical genre classification can assist or replace the human user in this process and would be a valuable addition to music information retrieval systems. In addition, automatic musical genre classification provides a framework for developing and evaluating features for any type of content-based analysis of musical signals. In this paper, the automatic classification of audio signals into an hierarchy of musical genres is explored. More specifically, three feature sets for representing timbral texture, rhythmic content and pitch content are proposed. The performance and relative importance of the proposed features is investigated by training statistical pattern recognition classifiers using real-world audio collections. Both whole file and real-time frame-based classification schemes are described. Using the proposed feature sets, classification of 61\% for ten musical genres is achieved. This result is comparable to results reported for human musical genre classification.},
author = {Tzanetakis, G and Cook, P},
doi = {10.1109/TSA.2002.800560},
issn = {1063-6676},
journal = {IEEE Transactions on Speech and Audio Processing},
keywords = {Computer science,Cultural differences,Feature extraction,Humans,Instruments,Multiple signal classification,Music information retrieval,Pattern recognition,Signal analysis,Wavelet analysis,World Wide Web,audio signal processing,audio signals,automatic musical genre classification,content-based analysis,feature sets,genre hierarchies,harmonic content,human musical genre classification,information retrieval,instrumentation,music,music information retrieval systems,musical genre annotation,musical signals,pitch content,real-time frame-based classification,rhythmic content,rhythmic structure,signal classification,statistical analysis,statistical pattern recognition classifiers traini,timbral texture,whole file classification},
number = {5},
pages = {293--302},
shorttitle = {Speech and Audio Processing, IEEE Transactions on},
title = {{Musical genre classification of audio signals}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1021072},
volume = {10},
year = {2002}
}
@inproceedings{Wu2013,
author = {Wu, B and Wun, S and Lee, C and Horner, A},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {415--421},
title = {{Spectral correlates in emotion labeling of sustained musical instrument tones}},
year = {2013}
}
@article{Papadopoulos2011,
author = {Papadopoulos, Helene and Peeters, Geoffroy},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
number = {1},
pages = {138--152},
title = {{Joint Estimation of Chords and Downbeats From an Audio Signal}},
volume = {19},
year = {2011}
}
@article{Ekman1992,
author = {Ekman, Paul},
journal = {Cognition and Emotion},
pages = {169--200},
title = {{An argument for basic emotions}},
volume = {6},
year = {1992}
}
@article{Barbancho2012,
author = {Barbancho, Ana M and Klapuri, Anssi and Tardon, Lorenzo J and Barbancho, Isabel},
doi = {10.1109/TASL.2011.2174227},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
number = {3},
pages = {915--921},
publisher = {IEEE},
title = {{Automatic Transcription of Guitar Chords and Fingering From Audio}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=6064873},
volume = {20},
year = {2012}
}
@inproceedings{Smith2011,
address = {Miami},
author = {Smith, Jordan B L and Burgoyne, J Ashley and Fujinaga, Ichiro and {De Roure}, David and Downie, J Stephen},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
number = {Ismir},
pages = {555--560},
title = {{Design and Creation of a Large-scale Database of Structural Annotations}},
year = {2011}
}
@inproceedings{Pesek2014a,
author = {Pesek, Matev\v{z} and Godec, Primo\v{z} and Poredo\v{s}, Mojca and Strle, Gregor and Guna, Jo\v{z}e and Stojmenova, Emilija and Poga\v{c}nik, Matev\v{z} and Marolt, Matija},
booktitle = {Proceedings of the EMPIRE workshop},
title = {{Gathering a dataset of multi-modal mood-dependent perceptual responses to music}},
year = {2014}
}
@article{Maher1990,
author = {Maher, Robert C},
journal = {Journal of the Audio Engineering Society},
month = dec,
number = {12},
pages = {956--979},
publisher = {Audio Engineering Society},
title = {{Evaluation of a Method for Separating Digitized Duet Signals}},
url = {http://www.aes.org/e-lib/browse.cfm?elib=6001},
volume = {38},
year = {1990}
}
@article{Clarkson1996,
author = {Clarkson, Marsha G and Martin, Rhonda L and Miciek, Sheridan G},
journal = {Infant behavior and development},
number = {2},
pages = {191--197},
title = {{Infants' Perception of Pitch: Number of Harmonics}},
volume = {19},
year = {1996}
}
@article{Grindlay2011,
abstract = {This paper presents a general probabilistic model for transcribing single-channel music recordings containing multiple polyphonic instrument sources. The system requires no prior knowledge of the instruments present in the mixture (other than the number), although it can benefit from information about instrument type if available. In contrast to many existing polyphonic transcription systems, our approach explicitly models the individual instruments and is thereby able to assign detected notes to their respective sources. We use training instruments to learn a set of linear manifolds in model parameter space which are then used during transcription to constrain the properties of models fit to the target mixture. This leads to a hierarchical mixture-of-subspaces design which makes it possible to supply the system with prior knowledge at different levels of abstraction. The proposed technique is evaluated on both recorded and synthesized mixtures containing two, three, four, and five instruments each. We compare our approach in terms of transcription with (i.e., detected pitches must be associated with the correct instrument) and without source-assignment to another multi-instrument transcription system as well as a baseline non-negative matrix factorization (NMF) algorithm. For two-instrument mixtures evaluated with source-assignment, we obtain average frame-level F-measures of up to 0.52 in the completely blind transcription setting (i.e., no prior knowledge of the instruments in the mixture) and up to 0.67 if we assume knowledge of the basic instrument types. For transcription without source assignment, these numbers rise to 0.76 and 0.83, respectively.},
author = {Grindlay, Graham and Ellis, Daniel P W},
doi = {10.1109/JSTSP.2011.2162395},
issn = {1932-4553},
journal = {IEEE Journal of Selected Topics in Signal Processing},
keywords = {Aerospace electronics,Eigeninstruments,Hidden Markov models,Instruments,Matrix decomposition,Probabilistic logic,Signal processing algorithms,Training,audio signal processing,eigenvalues and eigenfunctions,frame-level F-measures,hierarchical eigeninstruments,hierarchical mixture-of-subspaces design,multiinstrument polyphonic music,multiple polyphonic instrument sources,music,non-negative matrix factorization (NMF),nonnegative matrix factorization algorithm,polyphonic transcription,probabilistic model,probability,single-channel music recordings,subspace,transcription},
number = {6},
pages = {1159--1169},
shorttitle = {Selected Topics in Signal Processing, IEEE Journal},
title = {{Transcribing Multi-Instrument Polyphonic Music With Hierarchical Eigeninstruments}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=5957256},
volume = {5},
year = {2011}
}
@article{Werbos1990,
abstract = {Basic backpropagation, which is a simple method now being widely used in areas like pattern recognition and fault diagnosis, is reviewed. The basic equations for backpropagation through time, and applications to areas like pattern recognition involving dynamic systems, systems identification, and control are discussed. Further extensions of this method, to deal with systems other than neural networks, systems involving simultaneous equations, or true recurrent networks, and other practical issues arising with the method are described. Pseudocode is provided to clarify the algorithms. The chain rule for ordered derivatives-the theorem which underlies backpropagation-is briefly discussed. The focus is on designing a simpler version of backpropagation which can be translated into computer code and applied directly by neutral network users},
author = {Werbos, P J},
doi = {10.1109/5.58337},
issn = {00189219},
journal = {Proceedings of the IEEE},
number = {10},
pages = {1550--1560},
shorttitle = {Proceedings of the IEEE},
title = {{Backpropagation through time: what it does and how to do it}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=58337},
volume = {78},
year = {1990}
}
@article{laurier2007audio,
author = {Laurier, Cyril and Herrera, Perfecto and Mandel, M and Ellis, D},
journal = {Music Information Retrieval Evaluation eXchange (MIREX) extended abstract},
publisher = {Citeseer},
title = {{Audio music mood classification using support vector machine}},
year = {2007}
}
@inproceedings{Weninger2013,
address = {Vancouver},
author = {Weninger, Felix and Kirst, Christian and Schuller, Bjorn and Bungartz, Hans-Joachim},
booktitle = {Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
pages = {6--10},
title = {{A discriminative approach to polyphonic piano note transcription using supervised non-negative matrix factorization}},
year = {2013}
}
@article{Au2003,
abstract = {In bats and technological sonars, the gain of the receiver is progressively increased with time after the transmission of a signal to compensate for acoustic propagation loss. The current understanding of dolphin echolocation indicates that automatic gain control is not a part of their sonar system. In order to test this understanding, we have performed field measurements of free-ranging echolocating dolphins. Here we show that dolphins do possess an automatic gain control mechanism, but that it is implemented in the transmission phase rather than the receiving phase of a sonar cycle. We find that the amplitude of the dolphins' echolocation signals are highly range dependent; this amplitude increases with increasing target range, R, in a 20 log(R) fashion to compensate for propagation loss. If the echolocation target is a fish school with many sound scatterers, the echoes from the school will remain nearly constant with range as the dolphin closes in on it. This characteristic has the same effect as time-varying gain in bats and technological sonar when considered from a sonar system perspective.},
author = {Au, Whitlow W L and Benoit-Bird, Kelly J},
doi = {10.1038/nature01727},
issn = {0028-0836},
journal = {Nature},
keywords = {Acoustics,Animals,Chiroptera,Chiroptera: physiology,Dolphins,Dolphins: classification,Dolphins: physiology,Echolocation,Echolocation: physiology,Hearing,Hearing: physiology,Sound,Transportation,Transportation: instrumentation,Whales,Whales: physiology},
number = {6942},
pages = {861--863},
pmid = {12815429},
shorttitle = {Nature},
title = {{Automatic gain control in the echolocation system of dolphins.}},
url = {http://dx.doi.org/10.1038/nature01727},
volume = {423},
year = {2003}
}
@article{McCulloch1943,
author = {McCulloch, Warren S and Pitts, Walter},
doi = {10.1007/BF02478259},
issn = {0007-4985},
journal = {The Bulletin of Mathematical Biophysics},
month = dec,
number = {4},
pages = {115--133},
title = {{A logical calculus of the ideas immanent in nervous activity}},
url = {http://link.springer.com/10.1007/BF02478259},
volume = {5},
year = {1943}
}
@inproceedings{Woelfer2012,
address = {Porto},
author = {Woelfer, Jill Palzkill},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {367--372},
title = {{The role of music in the lives of homeless young people}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary;jsessionid=DEC4A9DBC031BFBCD2AF7B1FD2550839?doi=10.1.1.294.892},
year = {2012}
}
@article{Schedl2013,
author = {Schedl, Markus and Flexer, Arthur and Urbano, Juli\'{a}n},
doi = {10.1007/s10844-013-0247-6},
issn = {0925-9902},
journal = {Journal of Intelligent Information Systems},
number = {3},
pages = {523--539},
title = {{The neglected user in music information retrieval research}},
url = {http://link.springer.com/10.1007/s10844-013-0247-6},
volume = {41},
year = {2013}
}
@incollection{dalgleish1999handbook,
author = {Dalgleish, Tim and Power, Michael J},
booktitle = {Handbook of cognition and emotion},
publisher = {Wiley Online Library},
title = {{Basic Emotions}},
year = {1999}
}
@article{Downie2008,
author = {Downie, J Stephen},
journal = {Acoustical Science and Technology},
number = {4},
pages = {247--255},
title = {{The music information retrieval evaluation exchange (2005–2007): A window into music information retrieval research}},
url = {https://www.jstage.jst.go.jp/article/ast/29/4/29\_4\_247/\_article},
volume = {29},
year = {2008}
}
@article{Mauch2010a,
author = {Mauch, Matthias and Dixon, Simon},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
number = {6},
pages = {1280--1289},
title = {{Simultaneous Estimation of Chords and Musical Context From Audio}},
volume = {18},
year = {2010}
}
@article{Tirovolas2011,
author = {Tirovolas, Anna K and Levitin, Daniel J},
journal = {Music Perception: An Interdisciplinary Journal},
number = {1},
pages = {23--36},
title = {{music perception and cognition research from 1983 to 2010: a categorical and bibliometric analysis of empirical articles in Music Perception}},
volume = {29},
year = {2011}
}
@phdthesis{Maher1989,
author = {Maher, Robert Crawford},
pages = {150},
title = {{An Approach for the Separation of Voices in Composite Musical Signals}},
url = {http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.68.8532},
year = {1989}
}
@inproceedings{Harte2005,
address = {London},
author = {Harte, Christopher and Sandler, Mark and Abdallah, Samer and Gomez, Emilia},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{Symbolic representation of musical chords: A proposed syntax for text annotations}},
year = {2005}
}
@article{Gabrielsson2002,
abstract = {A distinction is made between emotion perception, that is, to perceive emotional expression in music without necessarily being affected oneself, and emotion induction, that is, listeners' emotional response to music. This distinction is not always observed, neither in everyday conversation about emotions, nor in scientific papers. Empirical studies of emotion perception are briefly reviewed with regard to listener agreement concerning expressed emotions, followed by a selective review of empirical studies on emotional response to music. Possible relationships between emotion perception and emotional response are discussed and exemplified: Positive relationship, negative relationship, no systematic relationship and no relationship. It is emphasised that both emotion perception and, especially, emotional response are dependent on an interplay between musical, personal, and situational factors. Some methodological questions and suggestions for further research are discussed.},
author = {Gabrielsson, Alf},
doi = {10.1177/10298649020050S105},
journal = {Musicae Scientiae},
month = sep,
number = {1\_suppl},
pages = {123--147},
title = {{Emotion Perceived and Emotion Felt: Same or Different?}},
url = {http://msx.sagepub.com/content/5/1\_suppl/123.short},
volume = {5},
year = {2002}
}
@article{Yu1983,
author = {Yu, Julie and Cooper, Harris},
journal = {Journal of Marketing Research},
number = {1},
pages = {36--44},
title = {{A Quantitative Review of Research Design Effects on Response Rates to Questionnaires}},
volume = {20},
year = {1983}
}
@inproceedings{logan2000mel,
author = {Logan, Beth and Others},
booktitle = {ISMIR},
title = {{Mel Frequency Cepstral Coefficients for Music Modeling.}},
year = {2000}
}
@article{Guna2014,
abstract = {We present the results of an evaluation of the performance of the Leap Motion Controller with the aid of a professional, high-precision, fast motion tracking system. A set of static and dynamic measurements was performed with different numbers of tracking objects and configurations. For the static measurements, a plastic arm model simulating a human arm was used. A set of 37 reference locations was selected to cover the controller's sensory space. For the dynamic measurements, a special V-shaped tool, consisting of two tracking objects maintaining a constant distance between them, was created to simulate two human fingers. In the static scenario, the standard deviation was less than 0.5 mm. The linear correlation revealed a significant increase in the standard deviation when moving away from the controller. The results of the dynamic scenario revealed the inconsistent performance of the controller, with a significant drop in accuracy for samples taken more than 250 mm above the controller's surface. The Leap Motion Controller undoubtedly represents a revolutionary input device for gesture-based human-computer interaction; however, due to its rather limited sensory space and inconsistent sampling frequency, in its current configuration it cannot currently be used as a professional tracking system.},
author = {Guna, Jo\v{z}e and Jakus, Grega and Poga\v{c}nik, Matev\v{z} and Toma\v{z}i\v{c}, Sa\v{s}o and Sodnik, Jaka},
doi = {10.3390/s140203702},
issn = {1424-8220},
journal = {Sensors (Basel, Switzerland)},
number = {2},
pages = {3702--3720},
pmid = {24566635},
publisher = {Multidisciplinary Digital Publishing Institute},
title = {{An analysis of the precision and reliability of the leap motion sensor and its suitability for static and dynamic tracking.}},
url = {http://www.mdpi.com/1424-8220/14/2/3702},
volume = {14},
year = {2014}
}
@inproceedings{Schmidt2013,
author = {Schmidt, Eric M and Kim, Youngmoo E},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {21--26},
title = {{Learning Rhythm and Melody Features with Deep Belief Networks}},
year = {2013}
}
@article{DeCheveigne2002,
author = {de Cheveigne, Alain},
journal = {The Journal of Acoustical Society of America},
number = {4},
pages = {1917--1930},
title = {{YIN, a fundamental frequency estimator for speech and music}},
volume = {111},
year = {2002}
}
@inproceedings{Kim2010,
address = {Utrecht},
author = {Kim, Y E and Schmidt, E M and Migneco, R and Morton, B G and Richardson, P and Scott, J and Speck, J A and Turnbull, D},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {255--266},
title = {{Music emotion recognition: A state of the art review}},
year = {2010}
}
@article{Kral1996,
author = {Kral, A and Majernik, V},
journal = {General Physiology and Biophysics},
number = {15},
pages = {109--127},
title = {{On lateral inhibition in the auditory system}},
volume = {I},
year = {1996}
}
@incollection{ben2010user,
author = {Ben-Hur, Asa and Weston, Jason},
booktitle = {Data mining techniques for the life sciences},
pages = {223--239},
publisher = {Springer},
title = {{A user’s guide to support vector machines}},
year = {2010}
}
@article{Ryynanen2008,
author = {Ryyn\"{a}nen, Matti P and Klapuri, Anssi P},
doi = {10.1162/comj.2008.32.3.72},
issn = {0148-9267},
journal = {Computer Music Journal},
month = sep,
number = {3},
pages = {72--86},
publisher = {MIT Press 238 Main St., Suite 500, Cambridge, MA 02142-1046USA journals-info@mit.edu},
title = {{Automatic Transcription of Melody, Bass Line, and Chords in Polyphonic Music}},
url = {http://www.mitpressjournals.org/doi/abs/10.1162/comj.2008.32.3.72?journalCode=comj},
volume = {32},
year = {2008}
}
@article{Tkalcic2013,
abstract = {Affective labeling of multimedia content has proved to be useful in recommender systems. In this paper we present a methodology for the implicit acquisition of affective labels for images. It is based on an emotion detection technique that takes as input the video sequences of the users' facial expressions. It extracts Gabor low level features from the video frames and employs a k nearest neighbors machine learning technique to generate affective labels in the valence-arousal-dominance space. We performed a comparative study of the performance of a content-based recommender (CBR) system for images that uses three types of metadata to model the users and the items: (i) generic metadata, (ii) explicitly acquired affective labels and (iii) implicitly acquired affective labels with the proposed methodology. The results show that the CBR performs best when explicit labels are used. However, implicitly acquired labels yield a significantly better performance of the CBR than generic metadata while being an unobtrusive feedback tool.},
author = {Tkalcic, Marko and Odic, Ante and Kosir, Andrej and Tasic, Jurij},
doi = {10.1109/TMM.2012.2229970},
issn = {1520-9210},
journal = {IEEE Transactions on Multimedia},
number = {2},
pages = {391--400},
shorttitle = {Multimedia, IEEE Transactions on},
title = {{Affective Labeling in a Content-Based Recommender System for Images}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6362231},
volume = {15},
year = {2013}
}
@article{panda2013multi,
author = {Panda, R and Malheiro, R and Rocha, B and Oliveira, A and Paiva, R P},
journal = {Proc. CMMR},
title = {{Multi-Modal Music Emotion Recognition: A New Dataset, Methodology and Comparative Analysis}},
year = {2013}
}
@techreport{Rosenblatt1957,
author = {Rosenblatt, Frank},
pages = {Report 85----460----1},
title = {{The Perceptron - a perceiving and recognizing automaton}},
year = {1957}
}
@article{Holzapfel2012,
abstract = {In this paper, we propose a method that can identify challenging music samples for beat tracking without ground truth. Our method, motivated by the machine learning method “selective sampling,” is based on the measurement of mutual agreement between beat sequences. In calculating this mutual agreement we show the critical influence of different evaluation measures. Using our approach we demonstrate how to compile a new evaluation dataset comprised of difficult excerpts for beat tracking and examine this difficulty in the context of perceptual and musical properties. Based on tag analysis we indicate the musical properties where future advances in beat tracking research would be most profitable and where beat tracking is too difficult to be attempted. Finally, we demonstrate how our mutual agreement method can be used to improve beat tracking accuracy on large music collections.},
author = {Holzapfel, A and Davies, M E P and Zapata, J R and Oliveira, J L and Gouyon, F},
doi = {10.1109/TASL.2012.2205244},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech, and Language Processing},
keywords = {Accuracy,Beat tracking,Correlation,Electronic mail,Estimation,Europe,Histograms,Humans,beat sequences,beat tracking evaluation,evaluation,ground truth annotation,learning (artificial intelligence),machine learning method,music,music collection,music sample,musical properties,mutual agreement,selective sampling,signal sampling,tag analysis},
month = nov,
number = {9},
pages = {2539--2548},
shorttitle = {Audio, Speech, and Language Processing, IEEE Trans},
title = {{Selective Sampling for Beat Tracking Evaluation}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=6220849},
volume = {20},
year = {2012}
}
@article{DiRusso2001,
abstract = {This study investigated the effect of attention on the contrast response curves of steady-state visual evoked potentials (VEPs) to counter-phased sinusoidal gratings. The 1 cyc/deg gratings were modulated either in luminance or chromaticity (equiluminant red-green). The luminance grating counter-phased at 9 Hz (to favour activation of the magno-cellular system), and the chromatic grating at 2.5 Hz (to favour activation of the parvo-cellular system). Attention was directed towards the gratings (displayed in the left visual field) by requiring subjects to detect and respond to randomly occurring changes in contrast. In the control condition, attention towards the grating was minimised by requiring subjects to detect a target letter amongst distracters briefly flashed in the contra-lateral visual field. Attention increased VEP amplitudes for both luminance and chromatic stimuli, more so at high than at low contrasts, increasing the slope of the contrast amplitude curves (over the non-saturating range of contrasts). The estimates of contrast threshold from extrapolation of amplitudes were unaffected by attention. Attention also changed the VEP phases, but only for luminance gratings, where it acted to reduce the magnitude of phase advance with contrast. Attention had no effect on the average phases for chromatic gratings. The results are consistent with the notion that attention acts on cortical gain control mechanisms, which are known to be different for the magno- and parvo-cellular systems.},
author = {{Di Russo}, Francesco and Spinelli, Donatella and Morrone, M.Concetta},
journal = {Vision Research},
keywords = {Attention,Automatic gain control contrast mechanisms,Visual evoked potentials},
number = {19},
pages = {2435--2447},
title = {{Automatic gain control contrast mechanisms are modulated by attention in humans: evidence from visual evoked potentials}},
url = {http://www.sciencedirect.com/science/article/pii/S0042698901001341},
volume = {41},
year = {2001}
}
@inproceedings{Wu2010,
address = {Barcelona},
author = {Wu, Ho-Hsiang and Bello, Juan P},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
title = {{Audio-based music visualization for music structure analysis}},
year = {2010}
}
@article{Brown1991,
author = {Brown, Judith},
journal = {Journal of the Acoustical Society of America},
number = {1},
pages = {425--434},
title = {{Calculation of a constant Q spectral transform}},
volume = {89},
year = {1991}
}
@inproceedings{Muller2011,
address = {Miami},
author = {M\"{u}ller, Meinard and Ewert, Sebastian},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {288--295},
title = {{Chroma Toolbox: MATLAB Implementations for Extracting Variants of Chroma-Based Audio Features}},
year = {2011}
}
@article{Cortes1995,
author = {Cortes, Corinna and Vapnik, Vladimir},
doi = {10.1023/A:1022627411411},
issn = {1573-0565},
journal = {Machine Learning},
month = sep,
number = {3},
pages = {273--297},
publisher = {Kluwer Academic Publishers-Plenum Publishers},
title = {{Support-Vector Networks}},
url = {http://link.springer.com/article/10.1023/A:1022627411411},
volume = {20},
year = {1995}
}
@inproceedings{Kavcic2013,
address = {Bled},
author = {Kav\v{c}i\v{c}, Alenka and Pesek, Matev\v{z} and Bohak, Ciril and Marolt, Matija},
booktitle = {Proceedings of the 1st Workshop on Defining a European Research Agenda on Information Systems and Management in eMedia Industries},
pages = {14--16},
title = {{Edoo : online match-making portal for educational content production}},
year = {2013}
}
@article{Klapuri2004,
abstract = {The aim of this overview is to describe methods for the automatic transcription of Western polyphonic music. The transcription task is here understood as transforming an acoustic musical signal into a MIDI-like symbolic representation. Only pitched musical instruments are considered: recognizing the sounds of drum instruments is not discussed. The main emphasis is laid on estimating the multiple fundamental frequencies of several concurrent sounds. Various approaches to solve this problem are discussed, including methods that are based on modelling the human auditory periphery, methods that mimic the human auditory scene analysis function, signal model-based Bayesian inference methods, and data-adaptive methods. Another subproblem addressed is the rhythmic parsing of acoustic musical signals. From the transcription point of view, this amounts to the temporal segmentation of music signals at different time scales. The relationship between the two subproblems and the general structure of the transcription problem is discussed.
The aim of this overview is to describe methods for the automatic transcription of Western polyphonic music. The transcription task is here understood as transforming an acoustic musical signal into a MIDI-like symbolic representation. Only pitched musical instruments are considered: recognizing the sounds of drum instruments is not discussed. The main emphasis is laid on estimating the multiple fundamental frequencies of several concurrent sounds. Various approaches to solve this problem are discussed, including methods that are based on modelling the human auditory periphery, methods that mimic the human auditory scene analysis function, signal model-based Bayesian inference methods, and data-adaptive methods. Another subproblem addressed is the rhythmic parsing of acoustic musical signals. From the transcription point of view, this amounts to the temporal segmentation of music signals at different time scales. The relationship between the two subproblems and the general structure of the transcription problem is discussed.},
author = {Klapuri, Anssi P},
doi = {10.1080/0929821042000317840},
issn = {0929-8215},
journal = {Journal of New Music Research},
month = sep,
number = {3},
pages = {269--282},
publisher = {Routledge},
title = {{Automatic Music Transcription as We Know it Today}},
url = {http://dx.doi.org/10.1080/0929821042000317840},
volume = {33},
year = {2004}
}
@article{Piszczalski1986,
author = {Piszczalski, Martin},
publisher = {University of Michigan},
title = {{A computational model of music transcription}},
url = {http://dl.acm.org/citation.cfm?id=15202},
year = {1986}
}
@inproceedings{Tkalcic2003,
abstract = {In this paper, we present and overview of colour spaces used in electrical engineering and image processing. We stress the importance of the perceptual, historical and applicational background that led to a colour space. The colour spaces presented are: RGB, opponent-colour spaces, phenomenal colour spaces, CMY, CMYK, TV colour spaces (YUV and YIQ), PhotoYCC, CIE XYZ, Lab and Luv colour spaces.},
author = {Tkalcic, M and Tasic, J F},
booktitle = {The IEEE Region 8 EUROCON 2003. Computer as a Tool.},
doi = {10.1109/EURCON.2003.1248032},
isbn = {0-7803-7763-X},
pages = {304--308},
publisher = {IEEE},
shorttitle = {EUROCON 2003. Computer as a Tool. The IEEE Region},
title = {{Colour spaces: perceptual, historical and applicational background}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1248032},
volume = {1},
year = {2003}
}
@inproceedings{Donaldson2009,
author = {Donaldson, Justin and Lamere, Paul},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {Tutorial},
title = {{Using Visualizations for Music Discovery}},
year = {2009}
}
@inproceedings{Henaff2011,
author = {Henaff, Mikael and Jarrett, Kevin and Kavukcouglu, Koray and LeCun, Yann},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {681--686},
title = {{Unsupervised Learning of Sparse Features for Scalable Audio Classification}},
year = {2011}
}
@inproceedings{Smaragdis2003,
abstract = {We present a methodology for analyzing polyphonic musical passages comprised of notes that exhibit a harmonically fixed spectral profile (such as piano notes). Taking advantage of this unique note structure, we can model the audio content of the musical passage by a linear basis transform and use non-negative matrix decomposition methods to estimate the spectral profile and the temporal information of every note. This approach results in a very simple and compact system that is not knowledge-based, but rather learns notes by observation.},
author = {Smaragdis, P and Brown, J C},
booktitle = {2003 IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (IEEE Cat. No.03TH8684)},
doi = {10.1109/ASPAA.2003.1285860},
isbn = {0-7803-7850-4},
keywords = {Cost function,Educational institutions,Harmonic analysis,Image analysis,Independent component analysis,Matrix decomposition,Multiple signal classification,Physics,Principal component analysis,Redundancy,audio content modeling,audio signal processing,harmonically fixed spectral profile,linear basis transform,music,nonnegative matrix decomposition,nonnegative matrix factorization,parameter estimation,polyphonic music transcription,spectral analysis,spectral profile estimation,temporal information estimation},
pages = {177--180},
publisher = {IEEE},
shorttitle = {Applications of Signal Processing to Audio and Aco},
title = {{Non-negative matrix factorization for polyphonic music transcription}},
url = {http://ieeexplore.ieee.org/lpdocs/epic03/wrapper.htm?arnumber=1285860},
year = {2003}
}
@inproceedings{Sheh2003a,
address = {Baltimore},
author = {Sheh, Alexander and Ellis, Daniel},
booktitle = {Proceedings of the International Conference on Music Information Retrieval (ISMIR)},
pages = {183--189},
title = {{Chord segmentation and recognition using em-trained HMM}},
year = {2003}
}
@article{Temperley2013,
abstract = {AbstractView full textDownload full textOriginal ArticleRelated articlesView all related articles


var addthis\_config = \{
ui\_cobrand: "Taylor \&amp; Francis Online",
services\_compact: "citeulike,netvibes,twitter,technorati,delicious,linkedin,facebook,stumbleupon,digg,google,more",
pubid: "ra-4dff56cd6bb1830b"
\};


More Sharing Services

var addthis\_config = \{"data\_track\_addressbar":true,"ui\_click":true\};


Add to shortlist


Link


Permalink


http://dx.doi.org/10.1080/09298215.2013.839525


Download Citation


Recommend to:


A friend},
author = {Temperley, David and de Clercq, Trevor},
doi = {10.1080/09298215.2013.839525},
issn = {0929-8215},
journal = {Journal of New Music Research},
month = sep,
pages = {1},
publisher = {Routledge},
title = {{Statistical Analysis of Harmony and Melody in Rock Music}},
url = {http://www.tandfonline.com/doi/abs/10.1080/09298215.2013.839525},
year = {2013}
}
@inproceedings{Dressler2011,
author = {Dressler, Karin},
pages = {19--24},
title = {{An Auditory Streaming Approach for Melody Extraction from Polyphonic Music.}},
year = {2011}
}
@inproceedings{barthet2013design,
author = {Barthet, Mathieu and Marston, David and Baume, Chris and Fazekas, Gy\"{o}rgy and Sandler, Mark},
booktitle = {Proc. International Society for Music Information Retrieval Conference},
title = {{Design and Evaluation of Semantic Mood Models for Music Recommendation}},
year = {2013}
}
@inproceedings{Peeters2013,
author = {Peeters, Geoffroy and Pauwels, Johan},
booktitle = {Proceedings of International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
pages = {749--753},
title = {{Evaluating Automatically Estimated Chord Sequences}},
year = {2013}
}
@article{Sapp2005,
author = {Sapp, Craig Stuart},
journal = {Computers and Intertainment},
number = {4},
pages = {1--19},
title = {{Visual hierarchical key analysis}},
volume = {3},
year = {2005}
}
@phdthesis{Marolt2002,
author = {Marolt, Matija},
title = {{Automatic Transcription of Piano Music with Neural Networks}},
year = {2002}
}
@article{Moorer1977,
author = {Moorer, James A},
journal = {Computer music journal},
number = {4},
pages = {32--38},
title = {{On the Transcription of Musical Sound by Computer}},
volume = {1},
year = {1977}
}
@incollection{Rumelhart1986,
abstract = {What makes people smarter than computers? The work described in these two volumes suggests that the answer lies in the massively parallel architecture of the human mind. It is some of the most exciting work in cognitive science, unifying neural and cognitive processes in a highly computational framework, with links to artificial intelligence. Although thought and problem solving have a sequential character when viewed over a time frame of minutes or hours, the authors argue that each step in the sequence is the result of the simultaneous activity of a large number of simple computational elements, each influencing others and being influenced by them. "Parallel Distributed Processing" describes their work in developing a theoretical framework for describing this parallel distributed processing activity and in applying the framework to the development of models of aspects of perception, memory, language, and thought. Volume 1 lays the theoretical foundations of parallel distributed processing. It introduces the approach and the reasons why the authors feel it is a fruitful one, describes several models of basic mechanisms with wide applicability to different problems, and presents a number of specific technical analyses of different aspects of parallel distributed models.},
author = {Rumelhart, David E},
booktitle = {Parallel Distributed Processing: Foundations},
chapter = {8},
editor = {McClelland, James L and Rumelhart, David E},
isbn = {026268053X},
pages = {318--362},
publisher = {MIT Press},
title = {{Learning intemal representations by error propagation}},
url = {http://books.google.si/books/about/Parallel\_Distributed\_Processing\_Foundati.html?id=eFPqqMBK-p8C\&pgis=1},
year = {1986}
}
@inproceedings{Zhang2005,
address = {Vancouver},
author = {Zhang, Yun-Gang and Zhang, Chang-Shui},
booktitle = {Advances in Neural Information Processing Systems},
title = {{Separation of Music Signals by Harmonic Structure Modeling}},
year = {2005}
}
@inproceedings{Wankhammer2009,
address = {Como},
author = {Wankhammer, Alexander and Sciri, Peter and Sontacchi, Alois},
booktitle = {Proceedings of Conference on Digital Audio Effects},
title = {{Chroma and MFCC based pattern recognition in audio files utilizing hidden Markov models and dynamic programming}},
year = {2009}
}
@article{Liu2006,
author = {Liu, D.},
doi = {10.1109/TSA.2005.860344},
issn = {1558-7916},
journal = {IEEE Transactions on Audio, Speech and Language Processing},
keywords = {Acoustic signal detection,Affective computing,Computer vision,Data mining,Mood,Multiple signal classification,Music information retrieval,Psychology,Rhythm,Testing,Timbre,acoustic signal detection,audio signal processing,automatic music mood detection,feature extraction,hierarchical framework,intensity feature set,mood detection,mood tracking,music,music audio signal tracking,music clip emotional expression,music emotion,music information retrieval,music mood,music retrieval,music understanding,rhythm feature set,rhythm regularity,rhythm strength,spectral contrast features,spectral shape features,tempo,timbre feature set},
language = {English},
month = jan,
number = {1},
pages = {5--18},
publisher = {IEEE},
title = {{Automatic mood detection and tracking of music audio signals}},
url = {http://ieeexplore.ieee.org/articleDetails.jsp?arnumber=1561259},
volume = {14},
year = {2006}
}