Voir la notice de l'article provenant de la source Math-Net.Ru
@article{UZERU_2024_58_3_a1, author = {Kh. S. Khechoyan}, title = {Synthetic document generation for the task of visual document understanding}, journal = {Proceedings of the Yerevan State University. Physical and mathematical sciences}, pages = {79--87}, publisher = {mathdoc}, volume = {58}, number = {3}, year = {2024}, language = {en}, url = {http://geodesic.mathdoc.fr/item/UZERU_2024_58_3_a1/} }
TY - JOUR AU - Kh. S. Khechoyan TI - Synthetic document generation for the task of visual document understanding JO - Proceedings of the Yerevan State University. Physical and mathematical sciences PY - 2024 SP - 79 EP - 87 VL - 58 IS - 3 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/UZERU_2024_58_3_a1/ LA - en ID - UZERU_2024_58_3_a1 ER -
%0 Journal Article %A Kh. S. Khechoyan %T Synthetic document generation for the task of visual document understanding %J Proceedings of the Yerevan State University. Physical and mathematical sciences %D 2024 %P 79-87 %V 58 %N 3 %I mathdoc %U http://geodesic.mathdoc.fr/item/UZERU_2024_58_3_a1/ %G en %F UZERU_2024_58_3_a1
Kh. S. Khechoyan. Synthetic document generation for the task of visual document understanding. Proceedings of the Yerevan State University. Physical and mathematical sciences, Tome 58 (2024) no. 3, pp. 79-87. http://geodesic.mathdoc.fr/item/UZERU_2024_58_3_a1/
[1] M. Kardas, P. Czapla, et al., “AxCell: Automatic Extraction of Results from Machine Learning Papers”, Proc. of the 2020 Conf. on Empirical Methods in Natural Language Processing (EMNLP), 2020, 8580–8594 | DOI
[2] S. Park, S. Shin, et al., CORD: A Consolidated Receipt Dataset for Post-OCR Parsing, 2022
[3] G. Jaume, H. K. Ekenel, J.-P. Thiran, FUNSD: A Dataset for Form Understanding in Noisy Scanned Documents, 2019 | DOI
[4] Huang Z., Chen K., et al. “Competition on Scanned Receipt OCR and Information Extraction”, 2019 Int. Conf. on Document Analysis and Recognition (ICDAR), 2021, 8580–8594 | DOI
[5] T. Stanisławek, F. Graliński, et al., “Kleister: Key Information Extraction Datasets Involving Long Documents with Complex Layouts”, Lecture Notes in Computer Science, 12856 (2021), 428–444 | DOI
[6] B. Smock, R. Pesala, R. Abraham, PubTables-1M: Towards Comprehensive Table Extraction from Unstructured Documents, 2021 | DOI
[7] Wang Z., Zhou Y., et al., “VRDU: A Benchmark for Visually-rich Document Understanding”, Proc. of the ACM SIGKDD Int. Conf. on Knowledge Discovery and Data Mining, 2023, 5184–5193 | DOI
[8] S. Capobianco, S. Marinai, DocEmul: A Toolkit to Generate Structured Historical Documents, 2017 | DOI
[9] N. Raman, S. Shah, M. Veloso, “Synthetic Document Generator for Annotation-free Layout Recognition”, Pattern Recognition, 120 (2021), 108660 | DOI
[10] D. Faraglia, et al., Faker. [Software] https://github.com/joke2k/faker | Zbl
[11] A. Yeghiazaryan, K. Khechoyan, et al., “Tokengrid: Toward More Efficient Data Extraction from Unstructured Documents”, IEEE Access, 10 (2022), 39261–39268 | DOI