Voir la notice de l'article provenant de la source Math-Net.Ru
@article{MBB_2020_15_2_a5, author = {A. M. Mukhin and M. A. Genaev and D. A. Rasskazov and S. A. Lashin and D. A. Afonnikov}, title = {RDBMS and {NoSQL} based hybrid technology for transcriptome data structuring and processing}, journal = {Matemati\v{c}eska\^a biologi\^a i bioinformatika}, pages = {455--470}, publisher = {mathdoc}, volume = {15}, number = {2}, year = {2020}, language = {ru}, url = {http://geodesic.mathdoc.fr/item/MBB_2020_15_2_a5/} }
TY - JOUR AU - A. M. Mukhin AU - M. A. Genaev AU - D. A. Rasskazov AU - S. A. Lashin AU - D. A. Afonnikov TI - RDBMS and NoSQL based hybrid technology for transcriptome data structuring and processing JO - Matematičeskaâ biologiâ i bioinformatika PY - 2020 SP - 455 EP - 470 VL - 15 IS - 2 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/MBB_2020_15_2_a5/ LA - ru ID - MBB_2020_15_2_a5 ER -
%0 Journal Article %A A. M. Mukhin %A M. A. Genaev %A D. A. Rasskazov %A S. A. Lashin %A D. A. Afonnikov %T RDBMS and NoSQL based hybrid technology for transcriptome data structuring and processing %J Matematičeskaâ biologiâ i bioinformatika %D 2020 %P 455-470 %V 15 %N 2 %I mathdoc %U http://geodesic.mathdoc.fr/item/MBB_2020_15_2_a5/ %G ru %F MBB_2020_15_2_a5
A. M. Mukhin; M. A. Genaev; D. A. Rasskazov; S. A. Lashin; D. A. Afonnikov. RDBMS and NoSQL based hybrid technology for transcriptome data structuring and processing. Matematičeskaâ biologiâ i bioinformatika, Tome 15 (2020) no. 2, pp. 455-470. http://geodesic.mathdoc.fr/item/MBB_2020_15_2_a5/
[1] L. B.B. Martin, Z. Fei, J. J. Giovannoni, J. K.C. Rose, “Catalyzing plant science research with RNA-seq”, Frontiers in Plant Science, 4 (2013), 66 | DOI | Zbl
[2] B. Usadel, A. R. Fernie, “The plant transcriptome-from integrating observations to models”, Frontiers in Plant Science, 4 (2013), 48 | DOI
[3] A. V. Klepikova, A. S. Kasianov, E. S. Gerasimov, M. D. Logacheva, A. A. Penin, “A high resolution map of the Arabidopsis thaliana developmental transcriptome based on RNA-seq profiling”, Plant Journal, 88:6 (2016), 1058–1070 | DOI
[4] S. R. Strickler, A. Bombarely, L. A. Mueller, “Designing a transcriptome next-generation sequencing project for a nonmodel plant species”, American Journal of Botany, 99:2 (2012), 257–266 | DOI
[5] B. J. Haas, A. Papanicolaou, M. Yassour, M. Grabherr, P. D. Blood, J. Bowden, M. B. Couger, D. Eccles, B. Li, M. Lieber et al, “De novo transcript sequence reconstruction from RNA-seq using the Trinity platform for reference generation and analysis”, Nature Protocols, 8:8 (2013), 1494–1512 | DOI
[6] D. Kim, B. Langmead, S. L. Salzberg, “HISAT: A fast spliced aligner with low memory requirements”, Nature Methods, 12:4 (2015), 357–360 | DOI
[7] D. M. Bryant, K. Johnson, T. DiTommaso, T. Tickle, M. B. Couger, D. Payzin-Dogru, T. J. Lee, N. D. Leigh, T. H. Kuo, F. G. Davis et al, “A Tissue-Mapped Axolotl De Novo Transcriptome Enables Identification of Limb Regeneration Factors”, Cell Reports, 18:3 (2017), 762–776 | DOI
[8] M. E. Bolger, B. Arsova, B. Usadel, “Plant genome and transcriptome annotations: From misconceptions to simple solutions”, Briefings in Bioinformatics, 19:3 (2018), 437–449
[9] A. Y. Glagoleva, N. A. Shmakov, O. Y. Shoeva, G. V. Vasiliev, N. V. Shatskaya, A. Börner, D. A. Afonnikov, E. K. Khlestkina, “Metabolic pathways and genes identified by RNA-seq analysis of barley near-isogenic lines differing by allelic state of the Black lemma and pericarp (Blp) gene”, BMC Plant Biology, 17 (2017), 182, 1 | DOI
[10] N. A. Shmakov, G. V. Vasiliev, N. V. Shatskaya, A. V. Doroshkov, E. I. Gordeeva, D. A. Afonnikov, E. K. Khlestkina, “Identification of nuclear genes controlling chlorophyll synthesis in barley by RNA-seq”, BMC Plant Biology, 16:3 (2016), 119–138
[11] I. Papatheodorou, P. Moreno, J. Manning, A. M.P. Fuentes, N. George, S. Fexova, N. A. Fonseca, A. Füllgrabe, M. Green, N. Huang et al, “Expression Atlas update: From tissues to single cells”, Nucleic Acids Research, 48:D1 (2020), D77–D83
[12] A. Masoudi-Nejad, S. Goto, R. Jauregui, M. Ito, S. Kawashima, Y. Moriya, T. R. Endo, M. Kanehisa, “EGENES: Transcriptome-based plant database of genes with metabolic pathway information and expressed sequence tag indices in KEGG”, Plant Physiology, 144:2 (2007), 857–866 | DOI
[13] S. Ueno, Y. Nakamura, M. Kobayashi, S. Terashima, W. Ishizuka, K. Uchiyama, Y. Tsumura, K. Yano, S. Goto, “TodoFirGene: Developing transcriptome resources for genetic analysis of abies sachalinensis”, Plant and Cell Physiology, 59:6 (2018), 1276–1284 | DOI
[14] A. Dubois, S. Carrere, O. Raymond, B. Pouvreau, L. Cottret, A. Roccia, J. P. Onesto, S. Sakr, R. Atanassova, S. Baudino et al, “Transcriptome database resource and gene expression atlas for the rose”, BMC Genomics, 13:1 (2012), 638 | DOI
[15] N. Fernández-Pozo, J. Canales, D. Guerrero-Fernández, D. P. Villalobos, S. M. Díaz-Moreno, R. Bautista, A. Flores-Monterroso, M. Á. Guevara, P. Perdiguero, C. Collada et al, “EuroPineDB: A high-coverage web database for maritime pine transcriptome”, BMC Genomics, 12:1 (2011), 366 | DOI
[16] D. W. Barnett, E. K. Garrison, A. R. Quinlan, M. P. Strömberg, G. T. Marth, “Bamtools: A C++ API and toolkit for analyzing and managing BAM files”, Bioinformatics, 27:12 (2011), 1691–1692 | DOI
[17] A. R. Quinlan, I. M. Hall, “BEDTools: A flexible suite of utilities for comparing genomic features”, Bioinformatics, 26:6 (2010), 841–842 | DOI
[18] H. Li, B. Handsaker, A. Wysoker, T. Fennell, J. Ruan, N. Homer, G. Marth, G. Abecasis, R. Durbin, “The Sequence Alignment/Map format and SAMtools”, Bioinformatics, 25:16 (2009), 2078–2079 | DOI
[19] G. Pertea, M. Pertea, “GFF Utilities: GffRead and GffCompare”, F1000Research, 9, 2020, 304 | DOI
[20] S. Anders, W. Huber, “Differential expression of RNA-Seq data at the gene level-the DESeq package”, F1000Research, 10, European Molecular Biology Laboratory (EMBL), Heidelberg, Germany, 2012 | Zbl
[21] N. L. Bray, H. Pimentel, P. Melsted, L. Pachter, “Near-optimal probabilistic RNA-seq quantification”, Nature Biotechnology, 34:5 (2016), 525–527 | DOI
[22] K. V. Gunbin, V. V. Suslov, M. A. Genaev, D. A. Afonnikov, “Computer System for Analysis of Molecular Evolution Modes (SAMEM): Analysis of molecular evolution modes at deep inner branches of the phylogenetic tree”, Silico Biology, 11:3 (2011), 109–123
[23] J. Han, E. Haihong, G. Le, J. Du, “Survey on NoSQL database”, ICPCA 2011: 6th International Conference on Pervasive Computing and Applications, 2011, 363–366
[24] M. Gabetta, I. Limongelli, E. Rizzo, A. Riva, D. Segagni, R. Bellazzi, “BigQ: A NoSQL based framework to handle genomic variants in i2b2”, BMC Bioinformatics, 16:1 (2015), 415 | DOI
[25] ENA Portal, (accessed: 23.10.2020) https://www.ebi.ac.uk/ena/portal/api/
[26] P. W. Harrison, B. Alako, C. Amid, A. Cerdeño-Tárraga, I. Cleland, S. Holt, A. Hussein, S. Jayathilaka, S. Kay, T. Keane et al, “The European Nucleotide Archive in 2018”, Nucleic Acids Research, 47:D1 (2019), D84–D88 | DOI
[27] Submit your project, biological samples, (accessed: 23.10.2020) https://www.ncbi.nlm.nih.gov/sra/docs/submitbio/
[28] S. R. A. S. Staff, Using the SRA Toolkit to convert.sra files into other formats, National Center for Biotechnology Information, 2011
[29] S. Chen, Y. Zhou, Y. Chen, J. Gu, “Fastp: An ultra-fast all-in-one FASTQ preprocessor”, Bioinformatics, 34:17 (2018), i884–i890 | DOI
[30] E. Bushmanova, D. Antipov, A. Lapidus, V. Suvorov, A. D. Prjibelski, “RnaQUAST: A quality assessment tool for de novo transcriptome assemblies”, Bioinformatics, 32:14 (2016), 2210–2212 | DOI
[31] T. D. Wu, C. K. Watanabe, “GMAP: A genomic mapping and alignment program for mRNA and EST sequences”, Bioinformatics, 21:9 (2005), 1859–1875 | DOI
[32] Ensembl Plants, (accessed: 23.10.2020) https://plants.ensembl.org/index.html
[33] P. J. Kersey, J. E. Allen, A. Allot, M. Barba, S. Boddu, B. J. Bolt, D. Carvalho-Silva, M. Christensen, P. Davis, C. Grabmueller et al, “Ensembl Genomes 2018: An integrated omics infrastructure for non-vertebrate species”, Nucleic Acids Research, 46:D1 (2018), D802–D808 | DOI
[34] P. Jones, D. Binns, H. Y. Chang, M. Fraser, W. Li, C. McAnulla, H. McWilliam, J. Maslen, A. Mitchell, G. Nuka et al, “InterProScan 5: Genome-scale protein function classification”, Bioinformatics, 30:9 (2014), 1236–1240 | DOI
[35] PostgreSQL: The world's most advanced open source database, (accessed: 23.10.2020) https://www.postgresql.org/
[36] H. J. Schönig, Mastering PostgreSQL 11: Expert techniques to build scalable, reliable, and fault-tolerant database applications, Packt Publishing Ltd, Birmingham, 2018, 448 pp.
[37] SQLAlchemy The Database Toolkit for Python, (accessed: 23.10.2020) https://www.sqlalchemy.org/
[38] PostgreSQL: Documentation: 12:11.2. Index Types, (accessed: 23.10.2020) https://www.postgresql.org/docs/12/indexes-types.html
[39] S. Carbon, E. Douglass, N. Dunn, B. Good, N. L. Harris, S. E. Lewis, C. J. Mungall, S. Basu, R. L. Chisholm, R. J. Dodson et al, “The Gene Ontology Resource: 20 years and still GOing strong”, Nucleic Acids Research, 47:D1 (2019), D330–D338
[40] D. Petković, “JSON integration in relational database systems”, Int J. Comput. Appl., 168:5 (2017), 14–19
[41] M. Kaur, B. Kaur M. Shaik, B. Shaik, PostgreSQL Development Essentials, Packt Publishing Ltd, Birmingham, 2016, 210 pp.
[42] DataGrip: kross-platformennaya sreda razrabotki dlya baz dannykh i SQL, (accessed: 23.10.2020) https://www.jetbrains.com/ru-ru/datagrip/
[43] pgAdmin PostgreSQL Tools, (accessed: 23.10.2020) https://www.pgadmin.org/