Voir la notice de l'article provenant de la source Math-Net.Ru
@article{MAIS_2023_30_4_a7, author = {A. V. Glazkova and D. A. Morozov and M. S. Vorobeva and A. A. Stupnikov}, title = {Keyphrase generation for the {Russian-language} scientific texts using {mT5}}, journal = {Modelirovanie i analiz informacionnyh sistem}, pages = {418--428}, publisher = {mathdoc}, volume = {30}, number = {4}, year = {2023}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/MAIS_2023_30_4_a7/} }
TY - JOUR AU - A. V. Glazkova AU - D. A. Morozov AU - M. S. Vorobeva AU - A. A. Stupnikov TI - Keyphrase generation for the Russian-language scientific texts using mT5 JO - Modelirovanie i analiz informacionnyh sistem PY - 2023 SP - 418 EP - 428 VL - 30 IS - 4 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/MAIS_2023_30_4_a7/ LA - ru ID - MAIS_2023_30_4_a7 ER -
%0 Journal Article %A A. V. Glazkova %A D. A. Morozov %A M. S. Vorobeva %A A. A. Stupnikov %T Keyphrase generation for the Russian-language scientific texts using mT5 %J Modelirovanie i analiz informacionnyh sistem %D 2023 %P 418-428 %V 30 %N 4 %I mathdoc %U http://geodesic.mathdoc.fr/item/MAIS_2023_30_4_a7/ %G ru %F MAIS_2023_30_4_a7
A. V. Glazkova; D. A. Morozov; M. S. Vorobeva; A. A. Stupnikov. Keyphrase generation for the Russian-language scientific texts using mT5. Modelirovanie i analiz informacionnyh sistem, Tome 30 (2023) no. 4, pp. 418-428. http://geodesic.mathdoc.fr/item/MAIS_2023_30_4_a7/
[1] N. S. Lagutina, K. V. Lagutina, A. S. Adrianov, I. V. Paramonov, “Russian language thesauri: Automated construction and application for natural language processing tasks”, Modeling and Analysis of Information Systems, 25:4 (2018), 435–458 (in Russian) | DOI
[2] S. Beliga, Keyword extraction: A review of methods and approaches, 2014 https://api.semanticscholar.org/CorpusID:6834431
[3] E. Çano, O. Bojar, “Keyphrase generation: A multi-aspect survey”, 25th Conference of Open Innovations Association, FRUCT, IEEE, 2019, 85–94
[4] R. Campos, V. Mangaravite, A. Pasquali, A. Jorge, C. Nunes, A. Jatowt, “YAKE! keyword extraction from single documents using multiple local features”, Information Sciences, 509 (2020), 257–289 | DOI
[5] S. R. El-Beltagy, A. Rafea, “KP-Miner: A keyphrase extraction system for English and Arabic documents”, Information systems, 34:1 (2009), 132–144 | DOI
[6] A. Bougouin, F. Boudin, B. Daille, “TopicRank: Graph-based topic ranking for keyphrase extraction”, International joint conference on natural language processing (IJCNLP), 2013, 543–551
[7] R. Mihalcea, P. Tarau, “TextRank: Bringing order into text”, Proceedings of the 2004 conference on empirical methods in natural language processing, 2004, 404–411
[8] I. H. Witten, G. W. Paynter, E. Frank, C. Gutwin, C. G. Nevill-Manning, “KEA: Practical automatic keyphrase extraction”, Proceedings of the fourth ACM conference on Digital libraries, 1999, 254–255 | DOI
[9] M. Grootendorst, KeyBERT: Minimal keyword extraction with BERT, version v3.0, 2020 https://github.com/MaartenGr/KeyBERT | DOI
[10] F. Boudin, Y. Gallina, “Redefining absent keyphrases and their effect on retrieval effectiveness”, Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Association for Computational Linguistics, 2021, 4185–4193 | DOI
[11] R. Meng, S. Zhao, S. Han, D. He, P. Brusilovsky, Y. Chi, “Deep keyphrase generation”, Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics, v. 1, Long Papers, 2017, 582–592 | DOI
[12] E. Cano, O. Bojar, “Keyphrase generation: A text summarization struggle”, Proceedings of NAACL-HLT, 2019, 666–672
[13] J. Zhao, Y. Zhang, “Incorporating linguistic constraints into keyphrase generation”, Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, 2019, 5224–5233 | DOI
[14] R. Liu, Z. Lin, W. Wang, Keyphrase prediction with pre-trained language model, 2020, arXiv: 2004.10462 [cs.CL]
[15] M. Kulkarni, D. Mahata, R. Arora, R. Bhowmik, “Learning rich representation of keyphrases from text”, Findings of the Association for Computational Linguistics: NAACL 2022, 2022, 891–906
[16] A. Vaswani et al, “Attention is all you need”, Proceedings of the 31st International Conference on Neural Information Processing Systems, 2017, 6000–6010
[17] M. F. M. Chowdhury, G. Rossiello, M. Glass, N. Mihindukulasooriya, A. Gliozzo, “Applying a generic sequence-to-sequence model for simple and effective keyphrase generation”, 2022, arXiv: 2201.05302 [cs.CL]
[18] A. Glazkova, D. Morozov, “Applying transformer-based text summarization for keyphrase generation”, Lobachevskii Journal of Mathematics, 44:1 (2023), 123–136 | DOI
[19] A. Glazkova, D. Morozov, “Multi-task fine-tuning for generating keyphrases in a scientific domain”, IX International Conference on Information Technology and Nanotechnology (ITNT), 2023, 1–5
[20] D. Wu, W. U. Ahmad, K. W. Chang, Pre-trained language models for keyphrase generation: A thorough empirical study, 2022, arXiv: 2212.10233 [cs.CL]
[21] E. G. Sokolova, O. Mitrofanova, “Automatic keyphrase extraction by applying KEA to Russian texts”, Computational linguistics and computing ontologies, 2017, 157–165 (in Russian)
[22] M. V. Sandul, E. G. Mikhailova, “Keyword extraction from single Russian document”, Proceedings of the Third Conference on Software Engineering and Information Management, 2018, 30–36
[23] E. Sokolova, A. Moskvina, O. Mitrofanova, “Keyphrase extraction from the Russian corpus on linguistics by means of KEA and RAKE algorithms”, Data analytics and management in data-intensive domains, 2018, 369–372
[24] O. A. Mitrofanova, D. A. Gavrilic, “Experiments on automatic keyphrase extraction in stylistically heterogeneous corpus of Russian texts”, Terra Linguistica, 50:4 (2022), 22–40 (in Russian)
[25] D. Morozov, A. Glazkova, M. Tyutyulnikov, B. Iomdin, “Keyphrase generation for abstracts of the Russian-language scientific articles”, NSU Vestnik. Series: Linguistics and Intercultural Communication, 21:1 (2023), 54–66 (in Russian) | DOI
[26] B. Koloski, S. Pollak, B. Skrlj, M. Martinc, “Extending neural keyword extraction with TF-IDF tagset matching”, Proceedings of the EACL Hackashop on News Media Content Analysis and Automated Report Generation, 2021, 22–29
[27] D. Morozov, A. Glazkova, Keyphrases CS Russian, version v1, 2022 https://data.mendeley.com/datasets/dv3j9wc59v/1 | DOI
[28] L. Xue et al, “mT5: A massively multilingual pre-trained text-to-text transformer”, Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, 2021, 483–498 | DOI
[29] K. Grashchenkov, A. Grabovoy, I. Khabutdinov, “A method of multilingual summarization for scientific documents”, Ivannikov Ispras Open Conference (ISPRAS), IEEE, 2022, 24–30 | DOI
[30] A. Gryaznov, R. Rybka, I. Moloshnikov, A. Selivanov, A. Sboev, “Influence of the duration of training a deep neural network model on the quality of text summarization task”, AIP Conference Proceedings, 2849:1 (2023), 400–006
[31] A. A. Pechnikov, “Comparative analysis of scientometrics indicators of journals Math-Net.ru and Elibrary.ru”, Vestnik Tomskogo gosudarstvennogo universiteta, 2021, no. 56, 112–121 (in Russian)
[32] Y. Kuratov, M. Arkhipov, “Adaptation of deep bidirectional multilingual transformers for Russian language”, Komp'juternaja Lingvistika i Intellektual'nye Tehnologii, 2019, 333–339 (in Russian)
[33] C. Raffel et al, “Exploring the limits of transfer learning with a unified text-to-text transformer”, The Journal of Machine Learning Research, 21:1 (2020), 5485–5551 | MR
[34] L. Page, S. Brin, R. Motwani, T. Winograd, The PageRank citation ranking: Bringing order to the web: Stanford InfoLab, 1 508 503, 1999
[35] M. Korobov, “Morphological analyzer and generator for Russian and Ukrainian languages”, Analysis of Images, Social Networks and Texts, 4th International Conference, AIST 2015, Revised Selected Papers (Yekaterinburg, Russia, April 9-11, 2015), Springer, 2015, 320–332 | DOI
[36] J. Devlin, M. W. Chang, K. Lee, K. Toutanova, “BERT: Pre-training of deep bidirectional transformers for language understanding”, Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, v. 1, Long and Short Papers, 2019, 4171–4186
[37] F. Boudin, “PKE: An open source python-based keyphrase extraction toolkit”, Proceedings of COLING 2016, the 26th international conference on computational linguistics: system demonstrations, 2016, 69–73
[38] N. A. Gerasimenko, A. S. Chernyavsky, “ruSciBERT: A transformer language model for obtaining semantic embeddings of scientific texts in Russian”, Doklady Mathematics, 106 (2022), S95–S96 | DOI
[39] C. Y. Lin, “ROUGE: A package for automatic evaluation of summaries”, Text summarization branches out, 2004, 74–81
[40] T. Zhang, V. Kishore, F. Wu, K. Q. Weinberger, Y. Artzi, BERTScore: Evaluating text generation with BERT, 2020, arXiv: 1904.09675 [cs.CL]