@article{VSPUI_2024_20_3_a6,
author = {S. L. Sergeev and I. S. Blekanov and F. V. Ezhov and N. A. Tarasov},
title = {Extending the applicability of the {Zipf's} laws to the sequences of byte data},
journal = {Vestnik Sankt-Peterburgskogo universiteta. Prikladna\^a matematika, informatika, processy upravleni\^a},
pages = {391--403},
year = {2024},
volume = {20},
number = {3},
language = {en},
url = {http://geodesic.mathdoc.fr/item/VSPUI_2024_20_3_a6/}
}
TY - JOUR AU - S. L. Sergeev AU - I. S. Blekanov AU - F. V. Ezhov AU - N. A. Tarasov TI - Extending the applicability of the Zipf's laws to the sequences of byte data JO - Vestnik Sankt-Peterburgskogo universiteta. Prikladnaâ matematika, informatika, processy upravleniâ PY - 2024 SP - 391 EP - 403 VL - 20 IS - 3 UR - http://geodesic.mathdoc.fr/item/VSPUI_2024_20_3_a6/ LA - en ID - VSPUI_2024_20_3_a6 ER -
%0 Journal Article %A S. L. Sergeev %A I. S. Blekanov %A F. V. Ezhov %A N. A. Tarasov %T Extending the applicability of the Zipf's laws to the sequences of byte data %J Vestnik Sankt-Peterburgskogo universiteta. Prikladnaâ matematika, informatika, processy upravleniâ %D 2024 %P 391-403 %V 20 %N 3 %U http://geodesic.mathdoc.fr/item/VSPUI_2024_20_3_a6/ %G en %F VSPUI_2024_20_3_a6
S. L. Sergeev; I. S. Blekanov; F. V. Ezhov; N. A. Tarasov. Extending the applicability of the Zipf's laws to the sequences of byte data. Vestnik Sankt-Peterburgskogo universiteta. Prikladnaâ matematika, informatika, processy upravleniâ, Tome 20 (2024) no. 3, pp. 391-403. http://geodesic.mathdoc.fr/item/VSPUI_2024_20_3_a6/
[1] G. K. Zipf, The psycho-biology of language: An introduction to dynamic philology, Routledge Publ, London, 1999, 356 pp.
[2] G. K. Zipf, Human behavior and the principle of least effort, Cambridge, Mass., 1965, 573 pp.
[3] B. Mandelbrot, “An informational theory of the statistical structure of language”, Communication Theory, 84 (1953), 486–502
[4] B. Mandelbrot, The fractal geometry of nature, W. H. Freeman $\$ Co. Publ., New York, 1982, 468 pp. | MR | Zbl
[5] G. Lu, Y. Jin, D. H. C. Du, “Frequency based chunking for data de-duplication”, 2010 IEEE International Symposium on Modeling, Analysis and Simulation of Computer and Telecommunication Systems, IEEE, 2010, 287–296
[6] R. H. Baayen, Word frequency distributions, Springer Science Business Media, Dordrecht, 2001, 335 pp. | MR
[7] S. T. Piantadosi, “Zipf's word frequency law in natural language: A critical review and future directions”, Psychonomic Bulletin $\ $ Review, 21:5 (2014), 1112–1130 | DOI
[8] S. Yu, C. Xu, H. Liu, Zipf's law in 50 languages: its structural pattern, linguistic interpretation, and cognitive motivation, 2018, arXiv: 1807.01855
[9] S. Arshad, S. Hu, B. N. Ashraf, “Zipf's law and city size distribution: A survey of the literature and future research agenda”, Physica A: Statistical Mechanics and its Applications, 492 (2018), 75–92 | DOI
[10] L. Gao, G. Zhou, J. Luo, Y. Huang, “Word embedding with Zipf's context”, IEEE Access, 7 (2019), 168934–168943 | DOI
[11] Baumann A., Każmierski K., Matzinger T., “Scaling laws for phonotactic complexity in spoken english language data”, Language and Speech, 64:3 (2021), 693–704 | DOI
[12] J. I. Perotti, O. V. Billoni, “On the emergence of Zipf's law in music”, Physica A: Statistical Mechanics and its Applications, 549 (2020), 124309 | DOI | MR
[13] A. Kershenbaum, V. Demartsev, D. E. Gammon, E. Geffen, M. L. Gustison, A. Ilany, A. R. Lameira, “Shannon entropy as a robust estimator of Zipf's law in animal vocal communication repertoires”, Methods in Ecology and Evolution, 12:3 (2021), 553–564 | DOI
[14] M. Crosier, L. D. Griffin, “Zipf's law in image coding schemes”, BMVC 2007– Proceedings of the British Machine Vision Conference, 2007, 1–10
[15] A. Kornai, “Zipf's law outside the middle range”, Sixth Meeting on Mathematics of Language, 1999, 347–356
[16] Á. Corral, G. Boleda, R. Ferrer-i-Cancho, “Zipf's law for word frequencies: Word forms versus lemmas in long texts”, PloS One, 10:7 (2015), 549, 23 pp. | DOI
[17] M. Matsumoto, T. Nishimura, “Mersenne twister: a 623-dimensionally equidistributed uniform pseudo-random number generator”, ACM Transactions on Modeling and Computer Simulation (TOMACS), 8:1 (1998), 3–30 | DOI
[18] M. E. O'Neill, PCG: A family of simple fast space-efficient statistically good algorithms for random number generation (accessed: May 1, 2024) https://www.pcg-random.org/
[19] Upgrading PCG64 with PCG64DXSM– NumPy v1.24 Manual (accessed: May 01, 2024) https://numpy.org/doc/stable/reference/random/upgrading-pcg64.html
[20] J. K. Salmon, M. A. Moraes, R. O. Dror, D. E. Shaw, “Parallel random numbers: as easy as 1, 2, 3”, Proceedings of 2011 International Conference for High Performance Computing, Networking, Storage and Analysis, 2011, 1–12
[21] SFC64. Small Fast Chaotic PRNG (accessed: May 1, 2024) https://numpy.org/doc/stable/reference/random/bit_generators/sfc64.html
[22] M. P. Bakulina, “Application of the Zipf law to text compression”, Journal of Applied and Industrial Mathematics, 2:4 (2008), 477–483 | DOI | MR
[23] M. A. Mahmood, K. A. Hasan, “Efficient compression scheme for large natural text using Zipf distribution”, 2019 1$^{st}$ International Conference on Advances in Science, Engineering and Robotics Technology (ICASERT), 2019, 1–6