Skip to content

Acknowledgements

Gianluca Pagliara edited this page Oct 24, 2021 · 1 revision

VoxCeleb, VGGVox-M, ResNetSE34L, ResNetSE34V2

@inproceedings{chung2020in,
  title={In defence of metric learning for speaker recognition},
  author={Chung, Joon Son and Huh, Jaesung and Mun, Seongkyu and Lee, Minjae and Heo, Hee Soo and Choe, Soyeon and Ham, Chiheon and Jung, Sunghwan and Lee, Bong-Jin and Han, Icksang},
  booktitle={Interspeech},
  year={2020}
}
@article{heo2020clova,
  title={Clova baseline system for the {VoxCeleb} Speaker Recognition Challenge 2020},
  author={Heo, Hee Soo and Lee, Bong-Jin and Huh, Jaesung and Chung, Joon Son},
  journal={arXiv preprint arXiv:2009.14153},
  year={2020}
}

AutoSpeech, ResNet-18, ResNet-34

@misc{ding2020autospeech,
    title={AutoSpeech: Neural Architecture Search for Speaker Recognition},
    author={Shaojin Ding and Tianlong Chen and Xinyu Gong and Weiwei Zha and Zhangyang Wang},
    year={2020},
    eprint={2005.03215},
    archivePrefix={arXiv},
    primaryClass={eess.AS}
}

SincNet

@inproceedings{DBLP:conf/slt/RavanelliB18,
  author    = {Mirco Ravanelli and
               Yoshua Bengio},
  title     = {Speaker Recognition from Raw Waveform with SincNet},
  booktitle = {{SLT}},
  pages     = {1021--1028},
  publisher = {{IEEE}},
  year      = {2018}
}

MobileNet

@INPROCEEDINGS{9207519,  
  author={J. A. {Chagas Nunes} and D. {Macêdo} and C. {Zanchettin}},  
  booktitle={2020 International Joint Conference on Neural Networks (IJCNN)},   
  title={AM-MobileNet1D: A Portable Model for Speaker Recognition},   
  year={2020},  
  volume={},  
  number={},  
  pages={1-8},  
  doi={10.1109/IJCNN48605.2020.9207519}
 }

3D Convolutional Neural Network

@article{torfi2017text,
  title={Text-independent speaker verification using 3d convolutional neural networks},
  author={Torfi, Amirsina and Nasrabadi, Nasser M and Dawson, Jeremy},
  journal={arXiv preprint arXiv:1705.09422},
  year={2017}
}

Metrics

@article{bianco2018dnnsbench,
 author = {Bianco, Simone and Cadene, Remi and Celona, Luigi and Napoletano, Paolo},
 year = {2018},
 title = {Benchmark Analysis of Representative Deep Neural Network Architectures},
 journal = {IEEE Access},
 volume = {6},
 pages = {64270-64277},
 doi = {10.1109/ACCESS.2018.2877890},
 ISSN = {2169-3536},
}

PyAnnote

@inproceedings{Bredin2020,
  Title = {{pyannote.audio: neural building blocks for speaker diarization}},
  Author = {{Bredin}, Herv{\'e} and {Yin}, Ruiqing and {Coria}, Juan Manuel and {Gelly}, Gregory and {Korshunov}, Pavel and {Lavechin}, Marvin and {Fustes}, Diego and {Titeux}, Hadrien and {Bouaziz}, Wassim and {Gill}, Marie-Philippe},
  Booktitle = {ICASSP 2020, IEEE International Conference on Acoustics, Speech, and Signal Processing},
  Address = {Barcelona, Spain},
  Month = {May},
  Year = {2020},
}