{"id":"https://openalex.org/W3163527543","doi":"https://doi.org/10.1109/icassp39728.2021.9413888","title":"Encoder-Decoder Based Pitch Tracking and Joint Model Training for Mandarin Tone Classification","display_name":"Encoder-Decoder Based Pitch Tracking and Joint Model Training for Mandarin Tone Classification","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3163527543","doi":"https://doi.org/10.1109/icassp39728.2021.9413888","mag":"3163527543"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413888","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413888","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5024772368","display_name":"Hao Huang","orcid":"https://orcid.org/0000-0001-6604-0951"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hao Huang","raw_affiliation_strings":["School of Information Science and Engineering, Xinjiang University, Urumqi, China","Xinjiang Provincial Key Laboratory of Multi-lingual Information Technology, Urumqi, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Xinjiang University, Urumqi, China","institution_ids":["https://openalex.org/I96908189"]},{"raw_affiliation_string":"Xinjiang Provincial Key Laboratory of Multi-lingual Information Technology, Urumqi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101575588","display_name":"Kai Wang","orcid":"https://orcid.org/0000-0001-9767-2951"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Kai Wang","raw_affiliation_strings":["School of Information Science and Engineering, Xinjiang University, Urumqi, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Xinjiang University, Urumqi, China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042363663","display_name":"Ying Hu","orcid":"https://orcid.org/0000-0001-7505-1767"},"institutions":[{"id":"https://openalex.org/I96908189","display_name":"Xinjiang University","ror":"https://ror.org/059gw8r13","country_code":"CN","type":"education","lineage":["https://openalex.org/I96908189"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ying Hu","raw_affiliation_strings":["School of Information Science and Engineering, Xinjiang University, Urumqi, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Xinjiang University, Urumqi, China","institution_ids":["https://openalex.org/I96908189"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053726259","display_name":"Sheng Li","orcid":"https://orcid.org/0000-0001-7636-3797"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sheng Li","raw_affiliation_strings":["National Institute of Information and Communications Technology (NICT), Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology (NICT), Kyoto, Japan","institution_ids":["https://openalex.org/I90023481"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5024772368"],"corresponding_institution_ids":["https://openalex.org/I96908189"],"apc_list":null,"apc_paid":null,"fwci":0.7714,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.70365169,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"5","issue":null,"first_page":"6943","last_page":"6947"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7942886352539062},{"id":"https://openalex.org/keywords/mandarin-chinese","display_name":"Mandarin Chinese","score":0.7109211683273315},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6130967736244202},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5355373024940491},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5252016186714172},{"id":"https://openalex.org/keywords/cascade","display_name":"Cascade","score":0.4929930865764618},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.46289435029029846},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4532058537006378},{"id":"https://openalex.org/keywords/tone","display_name":"Tone (literature)","score":0.45251184701919556},{"id":"https://openalex.org/keywords/viterbi-algorithm","display_name":"Viterbi algorithm","score":0.4311886131763458},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4296555817127228},{"id":"https://openalex.org/keywords/keyword-spotting","display_name":"Keyword spotting","score":0.41645777225494385},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.384798139333725},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08092212677001953}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7942886352539062},{"id":"https://openalex.org/C138954614","wikidata":"https://www.wikidata.org/wiki/Q9192","display_name":"Mandarin Chinese","level":2,"score":0.7109211683273315},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6130967736244202},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5355373024940491},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5252016186714172},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.4929930865764618},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.46289435029029846},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4532058537006378},{"id":"https://openalex.org/C2780583480","wikidata":"https://www.wikidata.org/wiki/Q1366327","display_name":"Tone (literature)","level":2,"score":0.45251184701919556},{"id":"https://openalex.org/C60582962","wikidata":"https://www.wikidata.org/wiki/Q83886","display_name":"Viterbi algorithm","level":3,"score":0.4311886131763458},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4296555817127228},{"id":"https://openalex.org/C2781213101","wikidata":"https://www.wikidata.org/wiki/Q6398558","display_name":"Keyword spotting","level":2,"score":0.41645777225494385},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.384798139333725},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08092212677001953},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C42360764","wikidata":"https://www.wikidata.org/wiki/Q83588","display_name":"Chemical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413888","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413888","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50728729","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100930927","pdf_url":null,"source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W55188809","https://openalex.org/W1524333225","https://openalex.org/W1648698152","https://openalex.org/W1975079546","https://openalex.org/W1980552887","https://openalex.org/W2017382318","https://openalex.org/W2034300186","https://openalex.org/W2084044763","https://openalex.org/W2085628288","https://openalex.org/W2091425152","https://openalex.org/W2107831318","https://openalex.org/W2108771579","https://openalex.org/W2118680762","https://openalex.org/W2118774185","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2134100300","https://openalex.org/W2146502635","https://openalex.org/W2153728019","https://openalex.org/W2155820021","https://openalex.org/W2156615793","https://openalex.org/W2171321863","https://openalex.org/W2405229007","https://openalex.org/W2480617478","https://openalex.org/W2509088535","https://openalex.org/W2962866891","https://openalex.org/W2963403868","https://openalex.org/W2964308564","https://openalex.org/W2972705345","https://openalex.org/W2973011366","https://openalex.org/W2994325518","https://openalex.org/W3011176162","https://openalex.org/W3124061379","https://openalex.org/W4237468081","https://openalex.org/W4385245566","https://openalex.org/W6631362777","https://openalex.org/W6637086781","https://openalex.org/W6676245417","https://openalex.org/W6679436768","https://openalex.org/W6681435938","https://openalex.org/W6683277039","https://openalex.org/W6683422774","https://openalex.org/W6721786912","https://openalex.org/W6739901393"],"related_works":["https://openalex.org/W2170821097","https://openalex.org/W2388033618","https://openalex.org/W2393887907","https://openalex.org/W2136652457","https://openalex.org/W2347443630","https://openalex.org/W2388826840","https://openalex.org/W2481138693","https://openalex.org/W2379831327","https://openalex.org/W2386691339","https://openalex.org/W156219719"],"abstract_inverted_index":{"We":[0,41,118],"pursue":[1],"an":[2,110],"interpretable":[3],"pitch":[4,18,24,38,71,96,121,143,147,175],"tracking":[5,39,148],"model":[6,12,25,149,163],"and":[7,58,98,101,123,153,178],"a":[8,74,179],"jointly":[9],"trained":[10,107],"tone":[11,15,77,92,99,124,168],"for":[13],"Mandarin":[14,76,128],"classification.":[16],"For":[17],"tracking,":[19],"present":[20],"deep":[21],"learning":[22],"based":[23,44],"structure":[26],"seldom":[27],"considers":[28],"the":[29,54,64,70,87,95,103,134,137,151,161,165],"Viterbi":[30,59],"decoding":[31],"commonly":[32],"implemented":[33,62],"in":[34,63,91,109],"prevalent":[35],"manually":[36],"designed":[37],"algorithms.":[40],"propose":[42],"RNN":[43],"Encoder-Decoder":[45],"framework":[46,170],"with":[47],"gating":[48],"mechanism":[49],"which":[50,171],"underlying":[51],"models":[52],"both":[53],"state":[55],"cost":[56],"estimation":[57],"back-tracing":[60],"pass":[61],"RAPT":[65],"algorithm.":[66],"Then":[67],"we":[68],"apply":[69],"extractor":[72,97],"to":[73,84,132],"down-stream":[75],"classification":[78,93,125,157,169],"task.":[79],"The":[80],"basic":[81],"motivation":[82],"is":[83],"combine":[85],"together":[86],"two":[88],"conventional":[89],"components":[90],"(i.e.,":[94],"classifier)":[100],"then":[102],"whole":[104],"network":[105],"are":[106,116],"simultaneously":[108],"end-to-end":[111],"fashion.":[112],"Various":[113],"cascade":[114,167],"methods":[115],"evaluated.":[117],"carry":[119],"out":[120],"extraction":[122,144],"experiments":[126],"on":[127,142],"continuous":[129],"speech":[130],"database":[131],"show":[133,145,160],"superiority":[135],"of":[136,174],"proposed":[138,146],"models.":[139],"Experimental":[140],"results":[141,159],"outperforms":[150,164],"DNN-RNN":[152],"bi-directional":[154],"variants.":[155],"Tone":[156],"experimental":[158],"composite":[162],"traditional":[166],"makes":[172],"use":[173],"related":[176],"feature":[177],"back-end":[180],"classifier.":[181]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
