Voir la notice de l'article provenant de la source Math-Net.Ru
@article{IZKAB_2024_26_4_a2, author = {A. N. Lukyanov and A. M. Tramova}, title = {A method for assessing the degree of confidence in the self-explanations of {GPT} models}, journal = {News of the Kabardin-Balkar scientific center of RAS}, pages = {54--61}, publisher = {mathdoc}, volume = {26}, number = {4}, year = {2024}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/IZKAB_2024_26_4_a2/} }
TY - JOUR AU - A. N. Lukyanov AU - A. M. Tramova TI - A method for assessing the degree of confidence in the self-explanations of GPT models JO - News of the Kabardin-Balkar scientific center of RAS PY - 2024 SP - 54 EP - 61 VL - 26 IS - 4 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/IZKAB_2024_26_4_a2/ LA - ru ID - IZKAB_2024_26_4_a2 ER -
%0 Journal Article %A A. N. Lukyanov %A A. M. Tramova %T A method for assessing the degree of confidence in the self-explanations of GPT models %J News of the Kabardin-Balkar scientific center of RAS %D 2024 %P 54-61 %V 26 %N 4 %I mathdoc %U http://geodesic.mathdoc.fr/item/IZKAB_2024_26_4_a2/ %G ru %F IZKAB_2024_26_4_a2
A. N. Lukyanov; A. M. Tramova. A method for assessing the degree of confidence in the self-explanations of GPT models. News of the Kabardin-Balkar scientific center of RAS, Tome 26 (2024) no. 4, pp. 54-61. http://geodesic.mathdoc.fr/item/IZKAB_2024_26_4_a2/
[1] A. Vaswani, N. Shazeer, N. Parmar et al., “Attention is all you need”, Advances in neural information processing systems, 2017, no. 3 https://arxiv.org/abs/1706.03762
[2] A. Dosovitskiy, L. Beyer, A. Kolesnikov et al., “An image is worth 16x16 words: Transformers for image recognition at scale”, In International Conference on Learning Representations, 2020 https://arxiv.org/abs/2010.11929
[3] R. R. Selvaraju, M. Cogswell, A. Das et al., Visual explanations from deep networks via gradient-based localization, Grad-CAM https://arxiv.org/abs/1610.02391
[4] M. T. Ribeiro, S. Singh, C. Guestrin, “Why should I trust you?”: Explaining the Predictions of Any Classifier https://arxiv.org/abs/1602.04938
[5] S. Lundberg, S. Lee, A unified approach to interpreting model predictions https:// arxiv.org/abs/1705.07874
[6] Jesse Vig. Visualizing attention in transformer-based language representation models https://arxiv.org/abs/1904.02679
[7] L. Bereska, E. Gavves, Mechanistic interpretability for AI Safety A review https://arxiv.org/abs/2404.14082
[8] P. Lewis, E. Perez, A. Piktus et al., Retrieval-augmented generation for knowledge intensive NLP tasks https://arxiv.org/abs/2005.11401
[9] J. Wei, X. Wang, D. Schuurmans et al., Chain-of-Thought Prompting Elicits Reasoning in Large Language Models https://arxiv.org/abs/2201.11903
[10] J. Pfau, W. Merrill, S. R. Bowman, Let's think dot by dot: Hidden computation in trans former language models https://arxiv.org/abs/2404.15758
[11] S. Abnar, W. Zuidema, Quantifying attention flow in transformers https://arxiv.org/ abs/2005.00928
[12] H. Touvron, T. Lavril, G. Izacard et al., Open and efficient foundation language models, LLaMA https://arxiv.org/abs/2302.13971
[13] A. Q. Jiang, A. Sablayrolles, A. Mensch et al., Mistral 7B https://arxiv.org/ abs/2310.06825
[14] L. Tunstall, E. Beeching, N. Lambert et al., Direct distillation of LM alignment, Zephyr https://arxiv.org/abs/2310.16944
[15] A. Gu, T. Dao, Linear-Time Sequence Modeling with Selective State Spaces, Mamba https://arxiv.org/abs/2312.00752
[16] A. Ali, I. Zimerman, L. Wolf, The Hidden Attention of Mamba Models https://arxiv.org/ abs/2403.01590