@article{ZNSL_2024_540_a13,
author = {A. Alekseev and A. Tillabaeva and G. Dzh. Kabaeva and S. I. Nikolenko},
title = {Syntax transfer to {Kyrgyz} using the treebank translation method},
journal = {Zapiski Nauchnykh Seminarov POMI},
pages = {252--275},
year = {2024},
volume = {540},
language = {ru},
url = {http://geodesic.mathdoc.fr/item/ZNSL_2024_540_a13/}
}
TY - JOUR AU - A. Alekseev AU - A. Tillabaeva AU - G. Dzh. Kabaeva AU - S. I. Nikolenko TI - Syntax transfer to Kyrgyz using the treebank translation method JO - Zapiski Nauchnykh Seminarov POMI PY - 2024 SP - 252 EP - 275 VL - 540 UR - http://geodesic.mathdoc.fr/item/ZNSL_2024_540_a13/ LA - ru ID - ZNSL_2024_540_a13 ER -
A. Alekseev; A. Tillabaeva; G. Dzh. Kabaeva; S. I. Nikolenko. Syntax transfer to Kyrgyz using the treebank translation method. Zapiski Nauchnykh Seminarov POMI, Investigations on applied mathematics and informatics. Part IV, Tome 540 (2024), pp. 252-275. http://geodesic.mathdoc.fr/item/ZNSL_2024_540_a13/
[1] UniversalDependencies/tools/eval.py: UD Evaluation Script on GitHub, , 2024 https://github.com/UniversalDependencies/tools/blob/19c980e95ed0944dd5ecd262322403f8a77cee69/eval.py
[2] J. Achiam, S. Adler, S. Agarwal, et al., GPT-4 technical report, 2023, arXiv: 2303.08774
[3] Z. Agić, A. Johannsen, B. Plank, et al., “Multilingual Projection for Parsing Truly Low-Resource Languages”, Transactions of the Association for Computational Linguistics, 2016, no. 4, 301–312
[4] A. Furkan, B. Chontaeva, C. Cöltekin, et al., Unifying the Annotations in Turkic Universal Dependencies Treebanks, 2nd UniDive Workshop Theses (Online), 2024
[5] A. Alekseev, alexeyev/apertium2ud: mapping tagsets, 2023
[6] A. Alekseev, T. Turatali, “KyrgyzNLP: Challenges, Progress, and Future”, Proceedings of the 12th International Conference on Analysis of Images, Social Networks, and Texts (AIST 2024), Lecture Notes in Computer Science, 15419, Springer, 2024 (to appear) | MR
[7] W. Ammar, G. Mulcaire, M. Ballesteros, et al., “Many Languages, One Parser”, Transactions of the Association for Computational Linguistics, 2016, no. 4, 431–444 | DOI
[8] I. Benli, UD_Kyrgyz-KTMU: UD for Kyrgyz, 2023 https://github.com/UniversalDependencies/UD_Kyrgyz-KTMU/
[9] I. Benli, B. Sharshembaev, “Dependency Parsing Based Treebank for Kyrgyz Language”, Ymer, 23:7 (2024), 325–342
[10] P.F. Brown, S.A. Della Pietra, V.J. Della Pietra, R.L. Mercer, “The mathematics of statistical machine translation: Parameter estimation”, Computational Linguistics, 19:2 (1993), 263–311
[11] A. Das, S. Sarkar, “A Survey of the Model Transfer Approaches to Cross-Lingual Dependency Parsing”, ACM Transactions on Asian and Low-Resource Language Information Processing, 19:5 (2020), 1–60
[12] M.-C. de Marneffe, C.D. Manning, J. Nivre, D. Zeman, “Universal Dependencies”, Computational Linguistics, 47:2 (2021), 255–308
[13] J. Devlin, M.-W. Chang, K. Lee, K. Toutanova, “BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding”, Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2019, 4171–4186 | MR
[14] Z.-Y. Dou, G. Neubig, “Word Alignment by Fine-tuning Embeddings on Parallel Corpora”, Conference of the European Chapter of the Association for Computational Linguistics (EACL), 2021
[15] G. Durrett, A. Pauls, D. Klein, “Syntactic transfer using a bilingual lexicon”, Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning, 2012, 1–11
[16] J. Heinecke, “ConlluEditor: a fully graphical editor for Universal Dependencies treebank files”, Universal Dependencies Workshop (Paris), 2019 | Zbl
[17] J.E. Hopcroft, R.M. Karp, “An $n^{5/2}$ algorithm for maximum matchings in bipartite graphs”, SIAM Journal on Computing, 2:4 (1973), 225–231 | DOI | MR | Zbl
[18] R. Hwa, Ph. Resnik, A. Weinberg, C. Cabezas, O. Kolak, “Bootstrapping parsers via syntactic projection across parallel texts”, Natural Language Engineering, 11:3 (2005), 311–325 | DOI
[19] M.J. Sabet, Ph. Dufter, F. Yvon, H. Schütze, “SimAlign: High Quality Word Alignments without Parallel Training Data using Static and Contextualized Embeddings”, Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing: Findings, 2020, 1627–1643
[20] A. Conneau, Unsupervised cross-lingual representation learning at scale, 2019, arXiv: 1911.02116
[21] Y. Liu, M. Ott, N. Goyal, J. Du, M. Joshi, D. Che, O. Levy, M. Lewis, L. Zettlemoyer, V. Stoyanov, RoBERTa: A Robustly Optimized BERT Pretraining Approach, 2019, arXiv: 1907.11692
[22] R. McDonald, J. Nivre, Y. Quirmbach-Brundage, et al., “Universal Dependency Annotation for Multilingual Parsing”, Proceedings of the 51st Annual Meeting of the Association for Computational Linguistics, v. 2, Short Papers, 2013, 92–97
[23] J. Mirzakhalov, A. Babu, A. Kunafin, A. Wahab, B. Moydinboyev, S. Ivanova, M. Uzokova, Sh. Pulatova, D. Ataman, J. Kreutzer, F. M. Tyers, O. Firat, J. Licato, S. Chellappan, “Evaluating Multiway Multilingual NMT in the Turkic Languages”, Proceedings of the Sixth Conference on Machine Translation, 2021, 518–530
[24] J. Nivre, “Towards a universal grammar for natural language processing”, International conference on intelligent text processing and computational linguistics, Springer, 2015, 3–16
[25] P. Qi, Y. Zhang, Y. Zhang, J. Bolton, C.D. Manning, “Stanza: A Python Natural Language Processing Toolkit for Many Human Languages”, Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics: System Demonstrations, 2020 | MR
[26] M.S. Rasooli, M. Collins, “Density-Driven Cross-Lingual Transfer of Dependency Parsers”, Proceedings of the 2015 Conference on Empirical Methods in Natural Language Processing (Lisbon, Portugal), 2015, 328–338 | DOI
[27] P. Stenetorp, S. Pyysalo, G. Topić, T. Ohta, S. Ananiadou, J. Tsujii, “brat: a Web-based Tool for NLP-Assisted Text Annotation”, Proceedings of the Demonstrations Session at EACL 2012 (Avignon, France), 2012
[28] M. Straka, “UDPipe 2.0 Prototype at CoNLL 2018 UD Shared Task”, Proceedings of the CoNLL 2018 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies (Brussels, Belgium), 2018, 197–207
[29] U. Sulubacak, G. Eryi{ğ}it, “Implementing universal dependency, morphology, and multiword expression annotation standards for Turkish language processing”, Turkish Journal of Electrical Engineering and Computer Sciences, 26:3 (2018), 1662–1672
[30] U. Sulubacak, G. Eryi{ğ}it, T. Pamay, “IMST: A revisited Turkish dependency treebank”, The 1st International Conference on Turkic Computational Linguistics, Ege University Press, 2016, 1–6
[31] U. Sulubacak, M. Gök{\i}rmak, F. M. Tyers, {Ç}. {Ç}öltekin, J. Nivre, and G. Eryi{ğ}it, “Universal dependencies for Turkish”, Proceedings of COLING 2016, the 26th International Conference on Computational Linguistics, Technical papers, 2016, 3444–3454
[32] A. Søgaard, “Data point selection for cross-language adaptation of dependency parsers”, The 49th Annual Meeting of the Association for Computational Linguistics: Human Language Technologies, Short Papers, v. 2, 2011, 682–686
[33] L. Tesnière, Éléments de syntaxe structurale, Klincksieck, Paris, 1959
[34] J. Tiedemann, Ž. Agić, J. Nivre, “Treebank Translation for Cross-Lingual Parser Induction”, Proceedings of the Eighteenth Conference on Computational Natural Language Learning (Ann Arbor, Michigan), 2014, 130–140 | DOI
[35] A. Tillabaeva, Syntactic Transfer Based on the Polivariant Parallel Kyrgyz-Russian Corpus Manas, Master's thesis, HSE University, Moscow, Russia, 2024 https://www.hse.ru/en/ma/ling/students/diplomas/930858853
[36] U. Türk, F. Atmaca, Ş. B. Özateş, G. Berk, S. T. Bedir, A. Köksal, B. Ö. Başaran, T. Güngör, A. Özgür, “Resources for Turkish Dependency Parsing: Introducing the BOUN Treebank and the BoAT Annotation Tool”, Language Resources and Evaluation, 56:1 (2022), 259–307 | DOI
[37] F. M. Tyers, M. Sheyanova, J .N. Washington, “UD Annotatrix: An Annotation Tool for Universal Dependencies”, Proceedings of the 16th International Workshop on Treebanks and Linguistic Theories, TLT16, 2018, 10–17
[38] Y. Veitsman, Recent Advancements and Challenges of Turkic Central Asian Language Processing, 2024, arXiv: 2407.05006 | MR
[39] P. Virtanen, R. Gommers, T. E. Oliphant, et al., “SciPy 1.0: Fundamental Algorithms for Scientific Computing in Python”, Nature Methods, 17 (2020), 261–272 | DOI
[40] J. Washington, {Ç}. {Ç}öltekin, F. Akkurt, B. Chontaeva, S. Eslami, G. Jumalieva, A. Kasieva, A. Kuzgun, B. Mar{ş}an, Ch. Taguchi, “Strategies for the Annotation of Pronominalised Locatives in Turkic Universal Dependency Treebanks”, Proc. Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD), LREC-COLING 2024, 2024, 207–219
[41] J. N. Washington, M. Ipasov, F. M. Tyers, “A finite-state morphological transducer for Kyrgyz”, LREC, 2012, 934–940
[42] H. Zhao, Y. Song, Ch. Kit, G. Zhou, “Cross-language dependency parsing using a bilingual lexicon”, Conference of the 47th Annual Meeting of the ACL and the 4th International Joint Conference on Natural Language Processing of the AFNLP, Suntec (Singapore), 2009, 55–63
[43] G. K. Dzhumalieva, A. A. Kasieva, S. Zh. Musazhanova, “Adaptatsiya terminov veb-proekta «Universalnye zavisimosti» na kyrgyzskii yazyk”, Vestnik KRSU, 23:6 (2023), 71–75
[44] A. A. Kasieva, G. K. Dzhumalieva, A. Tompson, et al., “Problemy kyrgyzskoi sintaksicheskoi annotatsii v freimvorke Universal Dependencies”, Proceedings of TurkLang-2023 (Bukhara), 2023, 189–216
[45] S. Zh. Musazhanova, A. A. Kasieva, G. K. Dzhumalieva, “Sintaksicheskaya annotatsiya kyrgyzskogo yazyka na osnove novosozdannogo korpusa”, Vestnik Issyk-Kulskogo universiteta, 54:2 (2023), 140–148