@article{ZNSL_2023_529_a8,
author = {N. Rusnachenko and The Anh Le and Ngoc Diep Nguyen},
title = {Pre-training longt5 for vietnamese mass-media multi-document summarization},
journal = {Zapiski Nauchnykh Seminarov POMI},
pages = {123--139},
year = {2023},
volume = {529},
language = {en},
url = {http://geodesic.mathdoc.fr/item/ZNSL_2023_529_a8/}
}
TY - JOUR AU - N. Rusnachenko AU - The Anh Le AU - Ngoc Diep Nguyen TI - Pre-training longt5 for vietnamese mass-media multi-document summarization JO - Zapiski Nauchnykh Seminarov POMI PY - 2023 SP - 123 EP - 139 VL - 529 UR - http://geodesic.mathdoc.fr/item/ZNSL_2023_529_a8/ LA - en ID - ZNSL_2023_529_a8 ER -
N. Rusnachenko; The Anh Le; Ngoc Diep Nguyen. Pre-training longt5 for vietnamese mass-media multi-document summarization. Zapiski Nauchnykh Seminarov POMI, Investigations on applied mathematics and informatics. Part II–1, Tome 529 (2023), pp. 123-139. http://geodesic.mathdoc.fr/item/ZNSL_2023_529_a8/
[1] J. Ainslie, S. Ontanon, C. Alberti, V. Cvicek, Z. Fisher, P. Pham, A. Ravula, S. Sanghai, Q. Wang, L. Yang, “ETC: Encoding long and structured inputs in transformers”, Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing, EMNLP (Online, November 2020), Association for Computational Linguistics, 268–284
[2] D. Bahdanau, K. Cho, Y. Bengio, Neural machine translation by jointly learning to align and translate, 2014, arXiv: 1409.0473
[3] I. Beltagy, M. E. Peters, A. Cohan, Longformer: The long-document transformer, 2020, arXiv: 2004.05150
[4] A. Cohan, F. Dernoncourt, D. S. Kim, T. Bui, S. Kim, W. Chang, N. Goharian, A discourse-aware attention model for abstractive summarization of long documents, 2018, arXiv: 1804.05685
[5] J. Devlin, M.-W. Chang, K. Lee, K. Toutanova, “BERT: Pre-training of deep bidirectional transformers for language understanding”, Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies (Minneapolis, Minnesota, June 2019), v. 1, Long and Short Papers, Association for Computational Linguistics, 4171–4186
[6] P. Erdős, A. Rényi, et al., “On the evolution of random graphs”, Publ. Math. Inst. Hung. Acad. Sci., 5:1 (1960), 17–60
[7] G. Erkan, D. R. Radev, “LexRank: Graph-based lexical centrality as salience in text summarization”, J. Artificial Intelligence Research, 22 (2004), 457–479
[8] J. Goldstein, J. Carbonell, “Summarization: (1) using MMR for diversity-based reranking and (2) evaluating summaries”, TIPSTER TEXT PROGRAM PHASE III, Proceedings of a Workshop held at Baltimore (Maryland, October 13-15, 1998), Association for Computational Linguistics, 1998, 181–195
[9] M. Guo, J. Ainslie, D. Uthus, S. Ontanon, J. Ni, Y.-H. Sung, Y. Yang, “LongT5: Efficient text-to-text transformer for long sequences”, Findings of the Association for Computational Linguistics, NAACL 2022 (Seattle, United States, July 2022), Association for Computational Linguistics, 724–736
[10] T. Kudo, J. Richardson, “SentencePiece: A simple and language independent subword tokenizer and detokenizer for neural text processing”, Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing: System Demonstrations (Brussels, Belgium, November 2018), Association for Computational Linguistics, 66–71
[11] M. Lewis, Y. Liu, N. Goyal, M. Ghazvininejad, A. Mohamed, O. Levy, V. Stoyanov, L. Zettlemoyer, “BART: Denoising sequence-to-sequence pre-training for natural language generation, translation, and comprehension”, Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics (Online, July 2020), Association for Computational Linguistics, 7871–7880
[12] H. P. Luhn, “The automatic creation of literature abstracts”, IBM J. Res. Dev., 2:2 (1958), 159–165
[13] R. Nallapati, B. Zhou, C. dos Santos, {Ç}. Gul{ç}ehre, B. Xiang, “Abstractive text summarization using sequence-to-sequence RNNs and beyond”, Proceedings of the 20th SIGNLL Conference on Computational Natural Language Learning (Berlin, Germany, August 2016), Association for Computational Linguistics, 280–290
[14] S. Narayan, S. B. Cohen, M. Lapata, “Don't give me the details, just the summary! topic-aware convolutional neural networks for extreme summarization”, Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing (Brussels, Belgium, October-November 2018), Association for Computational Linguistics, 1797–1807
[15] A. Nenkova, R. Passonneau, “Evaluating content selection in summarization: The pyramid method”, Proceedings of the Human Language Technology Conference of the North American Chapter of the Association for Computational Linguistics, HLT-NAACL 2004 (Boston, Massachusetts, USA, 5 2004), Association for Computational Linguistics, 145–152
[16] D. Q. Nguyen, A. T. Nguyen, “PhoBERT: Pre-trained language models for Vietnamese”, Findings of the Association for Computational Linguistics, EMNLP 2020 (Online, November 2020), Association for Computational Linguistics, 1037–1042
[17] M.-T. Nguyen, H.-D. Nguyen, T.-H.-N. Nguyen, V.-H. Nguyen, “Towards state-of-the-art baselines for vietnamese multi-document summarization”, 2018 10th International Conference on Knowledge and Systems Engineering (KSE), 2018, 85–90
[18] M.-T. Nguyen, H.-D. Nguyen, T.-H.-N. Nguyen, V.-H. Nguyen, “Towards state-of-the-art baselines for vietnamese multi-document summarization”, 2018 10th International Conference on Knowledge and Systems Engineering (KSE), 2018, 85–90
[19] L. Phan, H. Tran, H. Nguyen, T. H. Trinh, “ViT5: Pretrained text-to-text transformer for Vietnamese language generation”, Proceedings of the 2022 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Student Research Workshop, Association for Computational Linguistics, 2022, 136–142
[20] J. Phang, Y. Zhao, P. J. Liu, Investigating efficiently extending transformers for long input summarization, 2022, arXiv: 2208.04347
[21] C. Raffel, N. Shazeer, A. Roberts, K. Lee, S. Narang, M. Matena, Y. Zhou, W. Li, P. J. Liu, “Exploring the limits of transfer learning with a unified text-to-text transformer”, J. Machine Learning Research, 21:140 (2020), 1–67
[22] E. Sharma, C. Li, L. Wang, “BIGPATENT: A large-scale dataset for abstractive and coherent summarization”, Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics (Florence, Italy, July 2019), Association for Computational Linguistics, 2204–2213
[23] P. Shaw, J. Uszkoreit, A. Vaswani, “Self-attention with relative position representations”, Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Short Papers (New Orleans, Louisiana, June 2018), v. 2, Association for Computational Linguistics, 464–468
[24] H. Q. To, K. Van Nguyen, N. L.-T. Nguyen, A. G.-T. Nguyen, “Monolingual vs multilingual BERTology for Vietnamese extractive multi-document summarization”, Proceedings of the 35th Pacific Asia Conference on Language, Information and Computation (Shanghai, China, 11 2021), Association for Computational Lingustics, 692–699
[25] N. L. Tran, D. M. Le, D. Q. Nguyen, “Bartpho: Pre-trained sequence-to-sequence models for vietnamese”, Proceedings of the 23rd Annual Conference of the International Speech Communication Association, 2022
[26] N. T. Tran, M. Q. Nghiem, N. TH Nguyen, N. L. T. Nguyen, N. V. Chi, D. Dinh, “Vims: a high-quality vietnamese dataset for abstractive multi-document summarization”, Language Resources and Evaluation, 54:4 (2020), 893–920
[27] V.-G. Ung, A.-V. Luong, N.-T. Tran, M.-Q. Nghiem, “Combination of features for vietnamese news multi-document summarization”, 2015 Seventh International Conference on Knowledge and Systems Engineering (KSE), IEEE, 2015, 186–191
[28] A. Vaswani, N. Shazeer, N. Parmar, J. Uszkoreit, L. Jones, A. N. Gomez, Łukasz Kaiser, and Illia Polosukhin, “Attention is all you need”, Proceedings of the 31st International Conference on Neural Information Processing Systems, NIPS'17 (Red Hook, NY, USA), Curran Associates Inc, 2017, 6000–6010
[29] T. Vu, D. Q. Nguyen, D. Q. Nguyen, M. Dras, M. Johnson, “VnCoreNLP: A Vietnamese natural language processing toolkit”, Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Demonstrations (New Orleans, Louisiana, June 2018), Association for Computational Linguistics, 56–60
[30] W. Xiao, I. Beltagy, G. Carenini, A. Cohan, “PRIMERA: Pyramid-based masked sentence pre-training for multi-document summarization”, Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Dublin, Ireland, May 2022), v. 1, Long Papers, Association for Computational Linguistics, 5245–5263
[31] M. Zaheer, G. Guruganesh, K. A. Dubey, J. Ainslie, C. Alberti, S. Ontanon, P. Pham, A. Ravula, Q. Wang, L. Yang, et al., “Big bird: Transformers for longer sequences”, Advances in Neural Information Processing Systems, 33 (2020)
[32] J. Zhang, Y. Zhao, M. Saleh, P. J. Liu, “Pegasus: Pre-training with extracted gap-sentences for abstractive summarization”, Proceedings of the 37th International Conference on Machine Learning, ICML'20, 2020 JMLR.org
[33] Z. Zheng, X. Yue, S. Huang, J. Chen, A. Birch, “Towards making the most of context in neural machine translation”, Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence, IJCAI'20, 2021