{"id":"https://openalex.org/W3163341344","doi":"https://doi.org/10.1109/icassp39728.2021.9413742","title":"A Two-Stage Deep Modeling Approach to Articulatory Inversion","display_name":"A Two-Stage Deep Modeling Approach to Articulatory Inversion","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3163341344","doi":"https://doi.org/10.1109/icassp39728.2021.9413742","mag":"3163341344"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413742","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://hdl.handle.net/10447/636672","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032571112","display_name":"Abdolreza Sabzi Shahrebabaki","orcid":"https://orcid.org/0000-0002-0877-9456"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Abdolreza Sabzi Shahrebabaki","raw_affiliation_strings":["NTNU","NTNU,Department of Electronic Systems"],"affiliations":[{"raw_affiliation_string":"NTNU","institution_ids":[]},{"raw_affiliation_string":"NTNU,Department of Electronic Systems","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054931657","display_name":"Negar Olfati","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Negar Olfati","raw_affiliation_strings":["NTNU","NTNU,Department of Electronic Systems"],"affiliations":[{"raw_affiliation_string":"NTNU","institution_ids":[]},{"raw_affiliation_string":"NTNU,Department of Electronic Systems","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044015235","display_name":"Ali Shariq Imran","orcid":"https://orcid.org/0000-0002-2416-2878"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ali Shariq Imran","raw_affiliation_strings":["NTNU","NTNU,Department of Electronic Systems"],"affiliations":[{"raw_affiliation_string":"NTNU","institution_ids":[]},{"raw_affiliation_string":"NTNU,Department of Electronic Systems","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028516776","display_name":"Magne H. Johnsen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Magne Hallstein Johnsen","raw_affiliation_strings":["NTNU","NTNU,Department of Electronic Systems"],"affiliations":[{"raw_affiliation_string":"NTNU","institution_ids":[]},{"raw_affiliation_string":"NTNU,Department of Electronic Systems","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079659476","display_name":"Sabato Marco Siniscalchi","orcid":"https://orcid.org/0000-0002-0770-0507"},"institutions":[{"id":"https://openalex.org/I246010334","display_name":"Universit\u00e0 degli Studi di Enna Kore","ror":"https://ror.org/04vd28p53","country_code":"IT","type":"education","lineage":["https://openalex.org/I246010334"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Sabato Marco Siniscalchi","raw_affiliation_strings":["Kore University of Enna","NTNU","Kore University of Enna; NTNU"],"affiliations":[{"raw_affiliation_string":"Kore University of Enna","institution_ids":["https://openalex.org/I246010334"]},{"raw_affiliation_string":"NTNU","institution_ids":[]},{"raw_affiliation_string":"Kore University of Enna; NTNU","institution_ids":["https://openalex.org/I246010334"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055323428","display_name":"Torbj\u00f8rn Svendsen","orcid":"https://orcid.org/0000-0003-0578-7941"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Torbjorn Svendsen","raw_affiliation_strings":["NTNU","NTNU,Department of Electronic Systems"],"affiliations":[{"raw_affiliation_string":"NTNU","institution_ids":[]},{"raw_affiliation_string":"NTNU,Department of Electronic Systems","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5032571112"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.1539,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.41896517,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"6453","last_page":"6457"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inversion","display_name":"Inversion (geology)","score":0.7895485758781433},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7009643912315369},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.5493955612182617},{"id":"https://openalex.org/keywords/mean-squared-error","display_name":"Mean squared error","score":0.4695383310317993},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4473094046115875},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.43307697772979736},{"id":"https://openalex.org/keywords/recurrent-neural-network","display_name":"Recurrent neural network","score":0.4325385093688965},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4242447316646576},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.42396217584609985},{"id":"https://openalex.org/keywords/reduction","display_name":"Reduction (mathematics)","score":0.41119006276130676},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3624386191368103},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.22473615407943726},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.151431143283844}],"concepts":[{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.7895485758781433},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7009643912315369},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.5493955612182617},{"id":"https://openalex.org/C139945424","wikidata":"https://www.wikidata.org/wiki/Q1940696","display_name":"Mean squared error","level":2,"score":0.4695383310317993},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4473094046115875},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.43307697772979736},{"id":"https://openalex.org/C147168706","wikidata":"https://www.wikidata.org/wiki/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.4325385093688965},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4242447316646576},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.42396217584609985},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.41119006276130676},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3624386191368103},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.22473615407943726},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.151431143283844},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C109007969","wikidata":"https://www.wikidata.org/wiki/Q749565","display_name":"Structural basin","level":2,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413742","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413742","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:iris.unipa.it:10447/636672","is_oa":true,"landing_page_url":"https://hdl.handle.net/10447/636672","pdf_url":null,"source":{"id":"https://openalex.org/S4306401065","display_name":"Nova Science Publishers (Nova Science Publishers, Inc.)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/bookPart"},{"id":"pmh:oai:munin.uit.no:10037/31359","is_oa":true,"landing_page_url":"https://hdl.handle.net/10037/31359","pdf_url":null,"source":{"id":"https://openalex.org/S4306401716","display_name":"Duo Research Archive (University of Oslo)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184942183","host_organization_name":"University of Oslo","host_organization_lineage":["https://openalex.org/I184942183"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/bookPart"},{"id":"pmh:oai:ntnuopen.ntnu.no:11250/3025349","is_oa":true,"landing_page_url":"https://hdl.handle.net/11250/3025349","pdf_url":null,"source":{"id":"https://openalex.org/S4306401716","display_name":"Duo Research Archive (University of Oslo)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I184942183","host_organization_name":"University of Oslo","host_organization_lineage":["https://openalex.org/I184942183"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/bookPart"}],"best_oa_location":{"id":"pmh:oai:iris.unipa.it:10447/636672","is_oa":true,"landing_page_url":"https://hdl.handle.net/10447/636672","pdf_url":null,"source":{"id":"https://openalex.org/S4306401065","display_name":"Nova Science Publishers (Nova Science Publishers, Inc.)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/bookPart"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W900447646","https://openalex.org/W1525807100","https://openalex.org/W1531956331","https://openalex.org/W1572063013","https://openalex.org/W1982854652","https://openalex.org/W2006775235","https://openalex.org/W2052382192","https://openalex.org/W2066381494","https://openalex.org/W2102152293","https://openalex.org/W2105478683","https://openalex.org/W2116082100","https://openalex.org/W2166700615","https://openalex.org/W2231075402","https://openalex.org/W2395955767","https://openalex.org/W2402144811","https://openalex.org/W2402610474","https://openalex.org/W2507804770","https://openalex.org/W2622158094","https://openalex.org/W2766736793","https://openalex.org/W2787248829","https://openalex.org/W2889371465","https://openalex.org/W2953384591","https://openalex.org/W4231807801","https://openalex.org/W6631452893","https://openalex.org/W6711834147","https://openalex.org/W6713398208"],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W2101155126","https://openalex.org/W2043093291","https://openalex.org/W2964006806"],"abstract_inverted_index":{"This":[0],"paper":[1],"proposes":[2],"a":[3,19,41,60,65,78,85,95,100,162,182],"two-stage":[4,66],"deep":[5],"feed-forward":[6],"neural":[7],"network":[8],"(DNN)":[9],"to":[10,50,137,147,168],"tackle":[11],"the":[12,23,27,31,52,89,120,138,169,172],"acoustic-to-articulatory":[13,173],"inversion":[14,121],"(AAI)":[15],"problem.":[16],"DNNs":[17],"are":[18,75],"viable":[20],"solution":[21,62,67],"for":[22],"AAI":[24,159],"task,":[25],"but":[26],"temporal":[28,56,86],"continuity":[29],"of":[30,54,88,109,130,134,143,171],"estimated":[32,76,90],"articulatory":[33],"values":[34],"has":[35],"not":[36],"been":[37],"exploited":[38],"properly":[39],"when":[40],"DNN":[42,97],"is":[43,92,150,160,178],"employed.":[44],"In":[45],"this":[46],"work,":[47],"we":[48,154],"propose":[49],"address":[51],"lack":[53],"any":[55],"constraints":[57,118],"while":[58],"enforcing":[59],"parameter-parsimonious":[61],"by":[63],"deploying":[64],"based":[68],"only":[69],"on":[70,119],"DNNs:":[71],"(i)":[72],"Articulatory":[73],"trajectories":[74,91],"in":[77,94,132],"first":[79,103],"stage":[80,98,104],"using":[81],"DNN,":[82],"and":[83,140,174],"(ii)":[84],"window":[87],"used":[93],"follow-up":[96],"as":[99,110],"refinement.":[101],"The":[102],"estimation":[105],"could":[106],"be":[107],"thought":[108],"an":[111,126,141],"auxiliary":[112],"additional":[113],"information":[114],"that":[115,158,181],"poses":[116],"some":[117],"process.":[122],"Experimental":[123],"evidence":[124],"demonstrates":[125],"average":[127],"error":[128],"reduction":[129],"7.51%":[131],"terms":[133],"RMSE":[135],"compared":[136],"baseline,":[139],"improvement":[142,184],"2.39%":[144],"with":[145,187],"respect":[146],"Pearson":[148],"correlation":[149],"also":[151],"attained.":[152],"Finally,":[153],"should":[155],"point":[156],"out":[157],"still":[161],"highly":[163],"challenging":[164],"problem,":[165],"mainly":[166],"due":[167],"non-linearity":[170],"one-to-many":[175],"mapping.":[176],"It":[177],"thus":[179],"promising":[180],"significant":[183],"was":[185],"attained":[186],"our":[188],"simple":[189],"yet":[190],"elegant":[191],"solution.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2021-05-24T00:00:00"}
