Voir la notice de l'article provenant de la source Math-Net.Ru
@article{BGUMI_2019_3_a8, author = {T. Y. Thai and H. N. Huy and D. V. Tuyet and S. V. Ablameyko and D. V. Hoa and N. V. Hung}, title = {Tonal languages speech synthesis using an indirect pitch markers and the quantitative target approximation methods}, journal = {Journal of the Belarusian State University. Mathematics and Informatics}, pages = {105--121}, publisher = {mathdoc}, volume = {3}, year = {2019}, language = {en}, url = {http://geodesic.mathdoc.fr/item/BGUMI_2019_3_a8/} }
TY - JOUR AU - T. Y. Thai AU - H. N. Huy AU - D. V. Tuyet AU - S. V. Ablameyko AU - D. V. Hoa AU - N. V. Hung TI - Tonal languages speech synthesis using an indirect pitch markers and the quantitative target approximation methods JO - Journal of the Belarusian State University. Mathematics and Informatics PY - 2019 SP - 105 EP - 121 VL - 3 PB - mathdoc UR - http://geodesic.mathdoc.fr/item/BGUMI_2019_3_a8/ LA - en ID - BGUMI_2019_3_a8 ER -
%0 Journal Article %A T. Y. Thai %A H. N. Huy %A D. V. Tuyet %A S. V. Ablameyko %A D. V. Hoa %A N. V. Hung %T Tonal languages speech synthesis using an indirect pitch markers and the quantitative target approximation methods %J Journal of the Belarusian State University. Mathematics and Informatics %D 2019 %P 105-121 %V 3 %I mathdoc %U http://geodesic.mathdoc.fr/item/BGUMI_2019_3_a8/ %G en %F BGUMI_2019_3_a8
T. Y. Thai; H. N. Huy; D. V. Tuyet; S. V. Ablameyko; D. V. Hoa; N. V. Hung. Tonal languages speech synthesis using an indirect pitch markers and the quantitative target approximation methods. Journal of the Belarusian State University. Mathematics and Informatics, Tome 3 (2019), pp. 105-121. http://geodesic.mathdoc.fr/item/BGUMI_2019_3_a8/
[1] M. D. Kovacs, M. Y. Cho, P. F. Burchett, M. Trambert, “Benefits of integrated RIS/PACS/Reporting due to automatic population of templated reports”, Current Problems in Diagnostic Radiology, 48(1) (2019), 37–39 | DOI
[2] M. Plonkowski, P. Urbanovich, “The use of pitch in large-vocabulary continuous speech recognition system”, Przeglad Elektrotechniczny, 92(8) (2016), 78–81 | MR
[3] D. Wang, JHL. Hansen, “F (with index 0) estimation for noisy speech by exploring temporal harmonic structures in local time frequency spectrum segment”, IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP) (Shanghai, China), 2016, 6510–6514 | DOI | MR
[4] D. Talkin, “A Robust Algorithm for Pitch Tracking (RAPT). Speech Coding and Synthesis”, Elsevier Science BV, 1995, 495–518
[5] Y. i. Xu, S. Prom-on, “Articulatory-functional modeling of speech prosody: a review”, Proceedings of the 11th Annual Conference of the International Speech Communication Association (INTERSPEECH-2010) (Makuhari, Chiba, Japan). International Speech Communication Association, 2010, 46–49
[6] A. Kounoudes, P. A. Naylor, M. Brookes, “The DYPSA algorithm for estimation of glottal closure instants in voiced speech”, Proceedings of International Conference on Acoustics, Speech and Signal Processing (CASSP02) (Orlando, FL, USA), 2002, I349–I352 | DOI
[7] R. Smits, B. Yegnanarayana, “Determination of instants of significant excitation in speech using group delay function”, IEEE Transactions on Speech and Audio Processing, 3(5) (1995), 325–333 | DOI
[8] S. Prom-on, F. Liu, Y. Xu, “Functional modeling of tone, focus and sentence type in mandarin Chinese”, Proceedings of the 17th International Congress of Phonetic Sciences (China), 2011, 1638–1641, Hong Kong: City University of Hong Kong
[9] G. Bailly, B. Holm, “SFC: a trainable prosodic model”, Speech Communication, 46(3–4) (2005), 348–364 | DOI
[10] H. Fujisaki, “Dynamic characteristics of voice fundamental frequency in speech and singing. The Production of Speech”, New York: Springer, 1983, 39–55 | DOI
[11] G. Kochanski, C. Shih, “Prosody modeling with soft templates”, Speech Communication, 39(3–4) (2003), 311–352 | DOI | Zbl
[12] H. Fujisaki, K. Hirose, “Analysis of voice fundamental frequency contours for declarative sentences of Japanese”, Journal of the Acoustical Society of Japan, 5(4) (1984), 233–242
[13] Y. Xu, Q. E. Wang, “Pitch targets and their realization: evidence from Mandarin”, Speech Communication, 33(4) (2001), 319–337 | DOI | Zbl
[14] T. Y. Thai, N. V. Hung, D. V. Tuyet, NHo. Huy, S. Ablameyko, “An effective algorithm for determining pitch markers of Vietnamese speech sentences”, Advances in Neural Networks – ISNN’2018. Proceedings of the 15th International Symposium on Neural Networks, ISNN’2018 (Minsk, Belarus), 10878 (2018), 628–636, Cham: Springer | MR
[15] M. Brookes, “Voicebox: speech processing toolbox for MATLAB”, [Internet] | DOI
[16] Y. Xu, S. Prom-on, “Toward invariant functional representations of variable surface fundamental frequency trajectories: synthesizing speech melody via model-based stochastic learning”, Speech Communication, 57 (2014), 181–208 | DOI
[17] K. Weierstrass, “Uber die analytische Darstellbarkeit sogenannter willkurlicher Funktionen einer reellen Veranderlichen Sitzungsberichteder”, Berlin: Koniglich Preussischen Akademie der Wissenschaften zu Berlin, 1885, 633–639
[18] J. P. Cabral, J. Kane, C. Gobl, J. Carson-Berndsen, “Evaluation of glottal epoch detection algorithms on different voice types”, Proceedings of the 12th Annual Conference of the International Speech Communication Association (INTERSPEECH-2011) (Florence, Italy). International Speech Communication Association, 2011, 1989–1992
[19] “Optimizing Nonlinear Functions – MATLAB and Simulink”, [Internet], 2019 | DOI
[20] Y. Xu, S. Prom-on, “What is PENTAtrainer2”, [Internet], 2019 | DOI
[21] S. Prom-on, Y. i. Xu, “The qTA toolkit for prosody: learning underlying parameters of communicative functions through modeling”, Proceedings of Speech Prosody 2010, 100034 (2010), 1–4
[22] J. H. Chen, Y. A. Kao, “Pitch marking based on an adaptable filter and a peak-valley estimation method”, Computational Linguistics and Chinese Language Processing, 6(2) (2001), 31–42
[23] P. Boersma, D. Weenink, “Praat: Doing phonetics by computer”, [Internet], 2019 | DOI
[24] O. Babacan, T. Drugman, N. d’Alessandro, N. Henrich, T. Dutoit, “A comparative study of pitch extraction algorithms on a large variety of singing sounds”, Proceedings of International Conference on Acoustics, Speech and Signal Processing (CASSP'13) (Vancouver, BC, Canada), 2013, 7815–7819 | DOI
[25] “Yin pitch estimator”, [Internet], 2012 | DOI
[26] S. Prom-on, Y. i. Xu, “Discovering underlying tonal representations by computational modeling: a case study of thai”, Phonology Journal, 32(3) (2015), 505–535 | DOI
[27] Y. Li, J. Tao, W. Lai, X. Xu, “Quantitative intonation modeling of interrogative sentences for Mandarin speech synthesis”, Speech Communication, 89 (2017), 92–102 | DOI
[28] B. Wang, Y. Xu, Q. Ding, “Interactive prosodic marking of focus, boundary and newness in Mandarin”, Phonetica, 75(1) (2018), 24–56 | DOI
[29] F. Charpentier, M. Stella, “Diphone synthesis using an overlap-add technique for speech waveforms concatenation”, Proceedings of International Conference on Acoustics, Speech and Signal Processing (ICASSP?86) (Tokyo, Japan), 1986, 2015–2018 | DOI
[30] XXu. Ching, X. u. Yi, Luo. Li-Shi, “A pitch target approximation model for F0 trajectories in Mandarin”, Proceedings of the 14th International Congress of Phonetic Sciences (ICPHS’99), 1999, 2359–2362, San Francisco: University of California