Voir la notice de l'article provenant de la source Math-Net.Ru
@article{SEMR_2019_16_a48, author = {M. G. Chebunin and A. P. Kovalevskii}, title = {A statistical test for the {Zipf's} law by deviations from the {Heaps'} law}, journal = {Sibirskie \`elektronnye matemati\v{c}eskie izvesti\^a}, pages = {1822--1832}, publisher = {mathdoc}, volume = {16}, year = {2019}, language = {en}, url = {http://geodesic.mathdoc.fr/item/SEMR_2019_16_a48/} }
TY - JOUR AU - M. G. Chebunin AU - A. P. Kovalevskii TI - A statistical test for the Zipf's law by deviations from the Heaps' law JO - Sibirskie èlektronnye matematičeskie izvestiâ PY - 2019 SP - 1822 EP - 1832 VL - 16 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/SEMR_2019_16_a48/ LA - en ID - SEMR_2019_16_a48 ER -
M. G. Chebunin; A. P. Kovalevskii. A statistical test for the Zipf's law by deviations from the Heaps' law. Sibirskie èlektronnye matematičeskie izvestiâ, Tome 16 (2019), pp. 1822-1832. http://geodesic.mathdoc.fr/item/SEMR_2019_16_a48/
[1] E.G. Altmann, M. Gerlach, “Statistical laws in linguistics”, Creativity and Universality in Language, Lecture Notes in Morphogenesis, eds. M. Degli Esposti et al., 2016
[2] R. Baeza-Yates, G. Navarro, “Block Addressing Indices for Approximate Text Retrieval”, J. Am. Soc. Inf. Sci., 51 (2000), 69 | 3.0.CO;2-C class='badge bg-secondary rounded-pill ref-badge extid-badge'>DOI
[3] R.R. Bahadur, “On the number of distinct values in a large sample from an infinite discrete distribution”, Proceedings of the National Institute of Sciences of India, 26A, Supp. II (1960), 67–75 | MR | Zbl
[4] A.D. Barbour, “Univariate approximations in the infinite occupancy scheme”, Alea, 6 (2009), 415–433 | MR
[5] A.D. Barbour, A.V. Gnedin, “Small counts in the infinite occupancy scheme”, Electronic Journal of Probability, 14 (2009), 13, 365–384 | DOI | MR | Zbl
[6] A. Ben-Hamou, S. Boucheron, M. I. Ohannessian, “Concentration inequalities in the infinite urn scheme for occupancy counts and the missing mass, with applications”, Bernoulli, 23:1 (2017), 249–287 | DOI | MR | Zbl
[7] S. Bernhardsson, L.E. Correa da Rocha, P. Minnhagen, “The Meta Book and Size-Dependent Properties of Written Language”, New J. Phys., 11 (2009), 123015 | DOI
[8] M.G. Chebunin, “Estimation of parameters of probabilistic models which is based on the number of different elements in a sample”, Sib. Zh. Ind. Mat., 17:3 (2014), 135–147 (in Russian) | MR | Zbl
[9] M.G. Chebunin, “Functional central limit theorem in an infinite urn scheme for distributions with superheavy tails”, Sib. Elektron. Mat. Izv., 14 (2017), 1289–1298 | MR | Zbl
[10] M. Chebunin, A. Kovalevskii, “Functional central limit theorems for certain statistics in an infinite urn scheme”, Statistics and Probability Letters, 119 (2016), 344–348 | DOI | MR | Zbl
[11] M. Chebunin, A. Kovalevskii, “Asymptotically normal estimators for Zipf's law”, Sankhya A, 2018 | Zbl
[12] G. Decrouez, M. Grabchak, Q. Paris, “Finite sample properties of the mean occupancy counts and probabilities”, Bernoulli, 24:3 (2018), 1910–1941 | DOI | MR | Zbl
[13] P. Deheuvels, G.V. Martynov, “Cramer-von mises-type tests with applications to tests of independence for multivariate extreme-value distributions”, Communications in Statistics — Theory and Methods, 25:4 (1996), 871–908 | DOI | MR | Zbl
[14] O. Durieu, Y. Wang, “From infinite urn schemes to decompositions of self-similar Gaussian processes”, Electron. J. Probab., 21 (2016), 43, 23 pp. | DOI | MR | Zbl
[15] O. Durieu, G. Samorodnitsky, Y. Wang, “From infinite urn schemes to self-similar stable processes”, Stochastic Processes and their Applications, 2019 (to appear) | Zbl
[16] M. Dutko, “Central limit theorems for infinite urn models”, Ann. Probab., 17 (1989), 1255–1263 | DOI | MR | Zbl
[17] M. Gerlach, E.G. Altmann, “Stochastic Model for the Vocabulary Growth in Natural Languages”, Physical Review X, 3 (2013), 021006 | DOI
[18] A. Gnedin, B. Hansen, J. Pitman, “Notes on the occupancy problem with infinitely many boxes: general asymptotics and power laws”, Probability Surveys, 4 (2007), 146–171 | DOI | MR | Zbl
[19] A. Guillou, P. Hall, “A diagnostic for selecting the threshold in extreme value analysis”, Journal of the Royal Statistical Society: Series B, 63:2 (2002), 293–305 | DOI | MR
[20] H.S. Heaps, Information Retrieval: Computational and Theoretical Aspects, Academic Press, 1978 | Zbl
[21] G. Herdan, Type-token mathematics, The Hague, Mouton, 1960
[22] H.-K. Hwang, S. Janson, “Local Limit Theorems for Finite and Infinite Urn Models”, The Annals of Probability, 36:3 (2008), 992–1022 | DOI | MR | Zbl
[23] I. Eliazar, “The Growth Statistics of Zipfian Ensembles: Beyond Heaps' Law”, Physica (Amsterdam), 390 (2011), 3189 | DOI
[24] S. Karlin, “Central Limit Theorems for Certain Infinite Urn Schemes”, Jounal of Mathematics and Mechanics, 17:4 (1967), 373–401 | MR | Zbl
[25] E. S. Key, “Rare Numbers”, Journal of Theoretical Probability, 5:2 (1992), 375–389 | DOI | MR | Zbl
[26] E. S. Key, “Divergence rates for the number of rare numbers”, Journal of Theoretical Probability, 9:2 (1996), 413–428 | DOI | MR | Zbl
[27] A.P. Kovalevskii, E.V. Shatalin, “Asymptotics of sums of residuals of one-parameter linear regression on order statistics”, Theory of probability and its applications, 59:3 (2015), 375–387 | DOI | MR | Zbl
[28] A. Kovalevskii, E. Shatalin, “A limit process for a sequence of partial sums of residuals of a simple regression on order statistics”, Probability and Mathematical Statistics, 36:1 (2016), 113–120 | MR | Zbl
[29] D.C. van Leijenhorst, T.P. van der Weide, “A Formal Derivation of Heaps' Law”, Information Sciences (NY), 170 (2005), 263 | DOI | MR | Zbl
[30] G.V. Martynov, Omega-square tests, Nauka, M., 1978 (in Russian) | MR | Zbl
[31] A. Muratov, S. Zuyev, “Bit flipping and time to recover”, J. Appl. Probab., 53:3 (2016), 650–666 | DOI | MR | Zbl
[32] P.T. Nicholls, “Estimation of Zipf parameters”, J. Am. Soc. Inf. Sci., 38 (1987), 443–445 | 3.0.CO;2-E class='badge bg-secondary rounded-pill ref-badge extid-badge'>DOI
[33] M.I. Ohannessian, M.A. Dahleh, “Rare probability estimation under regularly varying heavy tails”, PMLR, 23, Proceedings of the 25th Annual Conference on Learning Theory (2012), 21.1–21.24
[34] A.M. Petersen, J.N. Tenenbaum, S. Havlin, H.E. Stanley, M. Perc, “Languages cool as they expand: Allometric scaling and the decreasing need for new words”, Scientific Reports, 2 (2012), 943 | DOI
[35] M.A. Serrano, A. Flammini, F. Menczer, “Modeling Statistical Properties of Written Text”, PLoS ONE, 4 (2009), e5372 | DOI
[36] N.V. Smirnov, “On the omega-squared distribution”, Mat. Sb., 2 (1937), 973–993 (in Russian) | Zbl
[37] N.S. Zakrevskaya, A.P. Kovalevskii, “One-parameter probabilistic models of text statistics”, Sib. Zh. Ind. Mat., 4:2 (2001), 142–153 (in Russian) | MR | Zbl
[38] N. Zakrevskaya, A. Kovalevskii, “An omega-square statistics for analysis of correspondence of small texts to the Zipf—Mandelbrot law”, Applied methods of statistical analysis. Statistical computation and simulation, AMSA'2019, Proceedings of the International Workshop (18–20 September 2019, Novosibirsk), NSTU, Novosibirsk, 2019, 488–494
[39] G.K. Zipf, The Psycho-Biology of Language, Routledge, London, 1936