Voir la notice de l'article provenant de la source Math-Net.Ru
@article{IZKAB_2022_6_a11, author = {K. V. Nalchadzhi}, title = {Overview of current open solutions in the field of speech recognition}, journal = {News of the Kabardin-Balkar scientific center of RAS}, pages = {127--133}, publisher = {mathdoc}, number = {6}, year = {2022}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/IZKAB_2022_6_a11/} }
TY - JOUR AU - K. V. Nalchadzhi TI - Overview of current open solutions in the field of speech recognition JO - News of the Kabardin-Balkar scientific center of RAS PY - 2022 SP - 127 EP - 133 IS - 6 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IZKAB_2022_6_a11/ LA - ru ID - IZKAB_2022_6_a11 ER -
K. V. Nalchadzhi. Overview of current open solutions in the field of speech recognition. News of the Kabardin-Balkar scientific center of RAS, no. 6 (2022), pp. 127-133. http://geodesic.mathdoc.fr/item/IZKAB_2022_6_a11/
[1] Hemant Yadav, Sunayana Sitaram [et al], “A Survey of Multilingual Models For Automatic Speech Recognition”, 2022, arXiv: abs/2202.12576
[2] Awni Hannun, Carl Case, Jared Casper [et al], Deep Deep Speech: Scaling up end-to-end speech recognition, 2007, arXiv: abs/1412.5567v2 | MR
[3] Roger Grosse, Helen Kwong, Andrew Y. Ng [et al], Shift-Invariance Sparse Coding for Audio Classification, 2012, arXiv: abs/1206.5241
[4] Awni Y. Hannun, Daniel Jurafsky, Andrew Y. Ng [et al], First-Pass Large Vocabulary Continuous Speech Recognition using Bi-Directional Recurrent DNNs, 2014, arXiv: abs/1408.2873
[5] Anmol Gulati, James Qin, Chung-Cheng Chiu [et al], Conformer: Convolution-augmented Transformer for Speech Recognition, 2020, arXiv: abs/2005.08100
[6] Andrew L. Maas, Peng Qi, Ziang Xie [et al], Building DNN Acoustic Models for Large Vocabulary Speech Recognition, 2014, arXiv: abs/1406.7806
[7] Kaitao Song, Xu Tan, Di He, Jianfeng Lu [et al], “Double Path Networks for Sequence to Sequence Learning”, In Proceedings of the 27th International Conference on Computational Linguistics, 2018, 3064–3074, arXiv: abs/1806.04856
[8] Dario Amodei, Rishita Anubhai, Eric Battenberg [et al], Deep Speech 2: End-to-End Speech Recognition in English and Mandarin, 2015, arXiv: abs/1512.02595
[9] Tianxiao Shen, Myle Ott, Michael Auli [et al], Mixture Models for Diverse Machine Translation: Tricks of the Trade, 2019, arXiv: abs/1902.07816
[10] Neil Zeghidour, Qiantong Xu, Vitaliy Liptchinsky [et al], Wav2Letter++: Fully Convolutional Speech Recognition, 2018, arXiv: abs/1812.06864
[11] Noam Shazeer, Azalia Mirhoseini, Krzysztof Maziarz [et al], Outrageously Large Neural Networks: The Sparsely-Gated Mixture-of-Experts Layer, 2017, arXiv: abs/1701.06538
[12] Shashi Narayan, Shay B. Cohen, Mirella Lapata [et al], Don't Give Me the Details, Just the Summary! Topic-Aware Convolutional Neural Networks for Extreme Summarization, 2018, arXiv: abs/1808.08745
[13] Myle Ott, Sergey Edunov, Alexei Baevski [et al], Fairseq: A Fast, Extensible Toolkit for Sequence Modeling, 2019, arXiv: abs/1904.01038
[14] An Analysis of Neural Language Modeling at Multiple Scales, Stephen Merity, Nitish Shirish Keskar, Richard Socher [et al], 2018, arXiv: abs/1803.08240
[15] Sebastian Gehrmann, Yuntian Deng, Alexander M. Rush [et al], Bottom Up Abstractive Summarization, 2018, arXiv: abs/1808.10792
[16] Shamil Chollampatt, Ng. Hwee Tou, A Multilayer Convolutional Encoder Decoder Neural Network for Grammatical Error Correction, 2018, arXiv: abs/1801.08831
[17] Steffen Schneider, Alexei Baevski, Ronan Collobert [et al], wav2vec: Unsupervised Pretraining for Speech Recognition, 2019, arXiv: abs/1904.05862
[18] Gabriel Synnaeve, Qiantong Xu, Jacob Kahn [et al], End-to-end ASR: from Supervised to Semi-Supervised Learning with Modern Architectures, 2019, arXiv: abs/1911.08460
[19] Jayadev Billa, Improving low-resource ASR performance with untranscribed out-of-domain data, 2021, arXiv: abs/2106.01227
[20] Wei-Ning Hsu, Benjamin Bolte, Yao-Hung Hubert Tsai [et al], Self-Supervised Speech Representation Learning by Masked Prediction of Hidden Units, HuBERT, 2021, arXiv: abs/2106.07447
[21] Tahir Javed, Sumanth Doddapaneni, Abhigyan Raman [et al], Towards Building ASR Systems for the Next Billion Users, 2021, arXiv: abs/2111.03945