@article{ZNSL_2024_540_a8,
author = {V. Pavliukevich and A. Zherdeva and O. Makhnytkina and D. Dyrmovskiy},
title = {Improving {RAG} with {LoRA} finetuning for persona text generation},
journal = {Zapiski Nauchnykh Seminarov POMI},
pages = {162--177},
year = {2024},
volume = {540},
language = {en},
url = {http://geodesic.mathdoc.fr/item/ZNSL_2024_540_a8/}
}
TY - JOUR AU - V. Pavliukevich AU - A. Zherdeva AU - O. Makhnytkina AU - D. Dyrmovskiy TI - Improving RAG with LoRA finetuning for persona text generation JO - Zapiski Nauchnykh Seminarov POMI PY - 2024 SP - 162 EP - 177 VL - 540 UR - http://geodesic.mathdoc.fr/item/ZNSL_2024_540_a8/ LA - en ID - ZNSL_2024_540_a8 ER -
V. Pavliukevich; A. Zherdeva; O. Makhnytkina; D. Dyrmovskiy. Improving RAG with LoRA finetuning for persona text generation. Zapiski Nauchnykh Seminarov POMI, Investigations on applied mathematics and informatics. Part IV, Tome 540 (2024), pp. 162-177. http://geodesic.mathdoc.fr/item/ZNSL_2024_540_a8/
[1] P. Lewis, E. Perez, A. Piktus, F. Petroni, V. Karpukhin, N. Goyal, H. Küttler, M. Lewis, W.-T. Yih, T. Rocktäschel, S. Riedel, and D. Kiela, Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks, 2020, arXiv: 2005.11401
[2] E. Hu, Y. Shen, Z. Allen-Zhu, Y. Li, S. Wang, L. Wang, and W. Chen, LoRA: Low-Rank Adaptation of Large Language Models, 2021, arXiv: 2106.09685
[3] Y. Matveev, O. Makhnytkina, P. Posokhov, A. Matveev, and S. Skrylnikov, “Personalizing Hybrid-Based Dialogue Agents”, Mathematics, 10:24 (2022), 4657 | DOI
[4] X. Xu, Z. Gou, W. Wu, Z. Niu, H. Wu, H. Wang, and S. Wang, Long Time No See! Open-Domain Conversation with Long-Term Persona Memory, 2022, arXiv: 2203.05797
[5] Y. Zheng, G. Chen, M. Huang, S. Liu, and X. Zhu, Personalized Dialogue Generation with Diversified Traits, 2019, arXiv: 1901.09672
[6] Z. Lin, A. Madotto, C. Wu, and P. Fung, Personalizing Dialogue Agents via Meta-Learning, 2019, arXiv: 1905.10033
[7] S. Bao, H. He, F. Wang, H. Wu, H. Wang, W. Wu, Z. Guo, Z. Liu, and X. Xu, PLATO-2: Towards Building an Open-Domain Chatbot via Curriculum Learning, 2021, arXiv: 2006.16779
[8] K. Apanasovich, O. Makhnytkina, and Y. Matveev, “Development and Research of Dialogue Agents with Long-Term Memory and Web Search”, Lecture Notes in Comput. Sci., 14338, 2023, 391–401 | DOI
[9] S. Zhang, E. Dinan, J. Urbanek, A. Szlam, D. Kiela, and J. Weston, Personalizing Dialogue Agents: I have a dog, do you have pets too?, 2018, arXiv: 1801.07243
[10] L. Xu, H. Xie, S.-Z.J. Qin, X. Tao, and F.L. Wang, Parameter-Efficient Fine-Tuning Methods for Pretrained Language Models: A Critical Review and Assessment, 2023, arXiv: 2312.12148
[11] P. He, X.L. Liu, J. Gao, and W. Chen, Parameter-Efficient Fine-Tuning of Large Language Models, 2024, arXiv: 2303.15647
[12] H. Liu, D. Tam, M. Muqeeth, J. Mohta, T. Huang, M. Bansal, and C. Raffel, Few-Shot Parameter-Efficient Fine-Tuning is Better and Cheaper than In-Context Learning, 2022, arXiv: 2205.05638
[13] J. Devlin, M.-W. Chang, K. Lee, and K. Toutanova, BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding, 2018, arXiv: 1810.04805
[14] M. Douze, A. Guzhva, C. Deng, J. Johnson, G. Szilvasy, P. E. Mazaré, et al., The FAISS library, 2024, arXiv: 2401.08281
[15] N. Liu, K. Lin, J. Hewitt, A. Paranjape, M. Bevilacqua, F. Petroni, and P. Liang, Lost in the Middle: How Language Models Use Long Contexts, 2023, arXiv: 2307.03172
[16] P. Posokhov, K. Apanasovich, A. Matveeva, O. Makhnytkina, and A. Matveev, “Personalizing dialogue agents for Russian: retrieve and refine”, Proceedings of the 31st Conference of Open Innovations Association FRUCT, 2022, 245–252
[17] A.Q. Jiang, A. Sablayrolles, A. Mensch, C. Bamford, et al., Mistral 7B, 2023, arXiv: 2310.06825
[18] R. Zellers, A. Holtzman, Y. Bisk, A. Farhadi, and Y. Choi, HellaSwag: Can a Machine Really Finish Your Sentence?, Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics, 2019, 4791–4800 | DOI
[19] D. Hendrycks, C. Burns, S. Basart, A. Zou, M. Mazeika, D. Song, and J. Steinhardt, “Measuring Massive Multitask Language Understanding”, Proceedings of the International Conference on Learning Representations, 2024
[20] K. Sakaguchi, R. Le Bras, C. Bhagavatula, and Y. Choi, “WinoGrande: An Adversarial Winograd Schema Challenge at Scale”, Proceedings of the AAAI Conference on Artificial Intelligence, 34:5 (2020), 8732–8740 | DOI
[21] OpenAI, GPT-4 Technical Report, 2024, arXiv: 2303.08774