Mots-clés : KNN classification
@article{VSPUI_2011_3_a8,
author = {M. S. Ageev and B. V. Dobrov},
title = {An efficient nearest neighbours search algorithm for full-text documents},
journal = {Vestnik Sankt-Peterburgskogo universiteta. Prikladna\^a matematika, informatika, processy upravleni\^a},
pages = {72--84},
year = {2011},
number = {3},
language = {ru},
url = {http://geodesic.mathdoc.fr/item/VSPUI_2011_3_a8/}
}
TY - JOUR AU - M. S. Ageev AU - B. V. Dobrov TI - An efficient nearest neighbours search algorithm for full-text documents JO - Vestnik Sankt-Peterburgskogo universiteta. Prikladnaâ matematika, informatika, processy upravleniâ PY - 2011 SP - 72 EP - 84 IS - 3 UR - http://geodesic.mathdoc.fr/item/VSPUI_2011_3_a8/ LA - ru ID - VSPUI_2011_3_a8 ER -
%0 Journal Article %A M. S. Ageev %A B. V. Dobrov %T An efficient nearest neighbours search algorithm for full-text documents %J Vestnik Sankt-Peterburgskogo universiteta. Prikladnaâ matematika, informatika, processy upravleniâ %D 2011 %P 72-84 %N 3 %U http://geodesic.mathdoc.fr/item/VSPUI_2011_3_a8/ %G ru %F VSPUI_2011_3_a8
M. S. Ageev; B. V. Dobrov. An efficient nearest neighbours search algorithm for full-text documents. Vestnik Sankt-Peterburgskogo universiteta. Prikladnaâ matematika, informatika, processy upravleniâ, no. 3 (2011), pp. 72-84. http://geodesic.mathdoc.fr/item/VSPUI_2011_3_a8/
[1] Joachims T., Text Categorization with Support Vector Machines: Learning with Many Relevant Features, Proc. of ECML-98. 10th European Conference on Machine Learning, 1998 URL: http://www.cs.cornell.edu/people/tj/publications/joachims_ 98<span style="font-style: normal; font-weight: normal">a.ps.gz</span>
[2] Yang Y., Liu X., “A re-examination of text categorization methods”, Proc. of SIGIR-99. 22nd ACM Intern. Conference on Research and Development in Information Retrieval, eds. M. A. Hearst, F. Gey, R. Tong, ACM Press, New York, Berkeley, 1999, 42–49 | DOI
[3] Ageev M. S., Dobrov B. V., Lukashevich N. V., “Avtomaticheskaya rubrikatsiya tekstov: metody i problemy”, Uchen. zap. Kazansk. gos. un-ta. Ser. Fiziko-matematicheskie nauki, 150:4 (2008), 25–40
[4] Ageev M. S., Dobrov B. V., Lukashevich N. V. i dr., “Klassifikatsiya zaprosov i optimizatsiya faktorov dlya poiska normativnykh dokumentov”, Ros. seminar po otsenke metodov informatsionnogo poiska, Trudy ROMIP'2009, ceminar v ramkakh Vseros. nauch. konferentsii RCDL'2009 (16 sentyabrya 2009 g., Petrozavodsk), NU TsSI, Sankt-Peterburg, 2009, 151–162
[5] Elsayed T., Lin J., Oard D. W., “Pairwise document similarity in large collections with MapReduce”, Proc. of the 46th Annual Meeting of the Association for Computational Linguistics on Human Language Technologies, Short papers (Columbus, Ohio, June 16–17, 2008), Morristown, NJ, 2008, 265–268
[6] Manning C. D., Raghavan P., Schutze H., Introduction to Information Retrieval, Cambridge, 2008 URL: http://nlp.stanford.edu/IR-book/information-retrieval-book.html
[7] Ageev M. S., Dobrov B. V., Lukashevich N. V., Sidorov A. V., “Eksperimentalnye algoritmy poiska/klassifikatsii i sravnenie s «basic line»”, Ros. seminar po otsenke metodov informatsionnogo poiska, Trudy vtorogo ros. seminara ROMIP'2004 (Puschino, 01.10.2004), SPb., Nauch.-issled. in-t khimii S.-Peterb. un-ta, 2004, 62–89
[8] Zelenkov Yu., Segalovich I., Sravnitelnyi analiz metodov opredeleniya nechetkikh dublikatov dlya Web-dokumentov, Devyataya Vseros. nauch. konferentsiya RCDL'2007 «Elektronnye biblioteki: Perspektivnye Metody i Tekhnologii, Elektronnye kollektsii» (Pereslavl-Zalesskii), 2007 URL: http://rcdl.ru/doc/2007/paper_ 65_ v1.pdf
[9] Ilyinsky S., Kuzmin M., Melkov A., Segalovich I., WWW-2002 – Eleventh Intern. World Wide Web Conference URL: http://www2002.org/CDROM/poster/187/
[10] Butakov S., Scherbinin V., “The toolbox for local and global plagiarism detection”, Comput. Educ., 52:781–788 (2009) URL: http://dx.doi.org/10.1016/j.compedu.2008.12.001
[11] Lifshits Y., Algorithms for Nearest Neighbor Search, Tutorial at RuSSIR'07 (Ekaterinburg, September 2007) URL: http://simsearch.yury.name/tutorial.html
[12] Zobel J., Moffat A., “Inverted files for text search engines”, ACM Comput. Surv., 38:2 (2006) URL: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.105.8844& rep=rep1& type=pdf | DOI
[13] Segalovich I., Maslov M., “Yandeks na ROMIP-2004. Nekotorye aspekty polnotekstovogo poiska i ranzhirovaniya v Yandeks”, Ros. seminar po otsenke metodov informatsionnogo poiska, Trudy vtorogo ros. seminara ROMIP'2004 (Puschino, 01.10.2004), Nauch.-issled. in-t khimii S.-Peterb. un-ta, SPb., 2004, 100–109
[14] Kak rabotayut novye Yandeks.Novosti, Yandeks, 2003 URL: http://company.yandex.ru/public/ articles/smi-mirror.xml
[15] Dean J., Ghemawat S., “MapReduce: simplified data processing on large clusters”, Proc. of the 6th conference on Symposium on Opearting Systems Design Implementation (San Francisco, CA. December 06–08, 2004), 10–12
[16] Apache Hadoop URL: http://hadoop.apache.org/
[17] AQUAINT-2 Information-Retrieval Text Research Collection URL: http://www.ldc.upenn.edu/Catalog/CatalogEntry.jsp?catalogId=LDC2008T25
[18] Veb kollektsiya BY.web, 2007 URL: http://romip.ru/ru/collections/by.web-2007.html