{"id":"https://openalex.org/W4405709132","doi":"https://doi.org/10.1109/iscslp63861.2024.10800441","title":"Arti-Invar: A Pre-trained Model for Enhancing Acoustic-to-Articulatory Inversion Performance","display_name":"Arti-Invar: A Pre-trained Model for Enhancing Acoustic-to-Articulatory Inversion Performance","publication_year":2024,"publication_date":"2024-11-07","ids":{"openalex":"https://openalex.org/W4405709132","doi":"https://doi.org/10.1109/iscslp63861.2024.10800441"},"language":"en","primary_location":{"id":"doi:10.1109/iscslp63861.2024.10800441","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100958561","display_name":"Yifeng Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I115212828","display_name":"Beijing Language and Culture University","ror":"https://ror.org/03te2zs36","country_code":"CN","type":"education","lineage":["https://openalex.org/I115212828"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifeng Sun","raw_affiliation_strings":["School of Information Science, Beijing Language and Culture University"],"affiliations":[{"raw_affiliation_string":"School of Information Science, Beijing Language and Culture University","institution_ids":["https://openalex.org/I115212828"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006684220","display_name":"Yanlu Xie","orcid":"https://orcid.org/0000-0001-6765-4808"},"institutions":[{"id":"https://openalex.org/I115212828","display_name":"Beijing Language and Culture University","ror":"https://ror.org/03te2zs36","country_code":"CN","type":"education","lineage":["https://openalex.org/I115212828"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanlu Xie","raw_affiliation_strings":["School of Information Science, Beijing Language and Culture University"],"affiliations":[{"raw_affiliation_string":"School of Information Science, Beijing Language and Culture University","institution_ids":["https://openalex.org/I115212828"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101732656","display_name":"Jingsong Zhang","orcid":"https://orcid.org/0000-0002-9023-9933"},"institutions":[{"id":"https://openalex.org/I115212828","display_name":"Beijing Language and Culture University","ror":"https://ror.org/03te2zs36","country_code":"CN","type":"education","lineage":["https://openalex.org/I115212828"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinsong Zhang","raw_affiliation_strings":["School of Information Science, Beijing Language and Culture University"],"affiliations":[{"raw_affiliation_string":"School of Information Science, Beijing Language and Culture University","institution_ids":["https://openalex.org/I115212828"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102845007","display_name":"Dengfeng Ke","orcid":"https://orcid.org/0000-0001-8459-0412"},"institutions":[{"id":"https://openalex.org/I115212828","display_name":"Beijing Language and Culture University","ror":"https://ror.org/03te2zs36","country_code":"CN","type":"education","lineage":["https://openalex.org/I115212828"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dengfeng Ke","raw_affiliation_strings":["School of Information Science, Beijing Language and Culture University"],"affiliations":[{"raw_affiliation_string":"School of Information Science, Beijing Language and Culture University","institution_ids":["https://openalex.org/I115212828"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100958561"],"corresponding_institution_ids":["https://openalex.org/I115212828"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28571046,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"154","last_page":"158"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9890999794006348,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9732000231742859,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9674999713897705,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inversion","display_name":"Inversion (geology)","score":0.5518322587013245},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5380672216415405},{"id":"https://openalex.org/keywords/acoustics","display_name":"Acoustics","score":0.4807112514972687},{"id":"https://openalex.org/keywords/invar","display_name":"Invar","score":0.4573405086994171},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.34599319100379944},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.26319751143455505},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.1347077190876007},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08672091364860535},{"id":"https://openalex.org/keywords/seismology","display_name":"Seismology","score":0.04437434673309326},{"id":"https://openalex.org/keywords/thermal-expansion","display_name":"Thermal expansion","score":0.04403036832809448}],"concepts":[{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.5518322587013245},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5380672216415405},{"id":"https://openalex.org/C24890656","wikidata":"https://www.wikidata.org/wiki/Q82811","display_name":"Acoustics","level":1,"score":0.4807112514972687},{"id":"https://openalex.org/C2776439352","wikidata":"https://www.wikidata.org/wiki/Q898215","display_name":"Invar","level":3,"score":0.4573405086994171},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.34599319100379944},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.26319751143455505},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.1347077190876007},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08672091364860535},{"id":"https://openalex.org/C165205528","wikidata":"https://www.wikidata.org/wiki/Q83371","display_name":"Seismology","level":1,"score":0.04437434673309326},{"id":"https://openalex.org/C47463417","wikidata":"https://www.wikidata.org/wiki/Q6583695","display_name":"Thermal expansion","level":2,"score":0.04403036832809448},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0},{"id":"https://openalex.org/C77928131","wikidata":"https://www.wikidata.org/wiki/Q193343","display_name":"Tectonics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iscslp63861.2024.10800441","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iscslp63861.2024.10800441","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 14th International Symposium on Chinese Spoken Language Processing (ISCSLP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/13","display_name":"Climate action","score":0.4699999988079071}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W46785905","https://openalex.org/W201148316","https://openalex.org/W221422375","https://openalex.org/W1481348651","https://openalex.org/W1525807100","https://openalex.org/W1531956331","https://openalex.org/W2006775235","https://openalex.org/W2013317861","https://openalex.org/W2069618035","https://openalex.org/W2105478683","https://openalex.org/W2131770202","https://openalex.org/W2295705173","https://openalex.org/W2395899413","https://openalex.org/W2507804770","https://openalex.org/W2622158094","https://openalex.org/W2752782242","https://openalex.org/W2780494472","https://openalex.org/W2972659941","https://openalex.org/W3034552520","https://openalex.org/W3095437272","https://openalex.org/W4224918482","https://openalex.org/W6711834147","https://openalex.org/W6784141141"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2104151821","https://openalex.org/W1989952468","https://openalex.org/W4386225843","https://openalex.org/W2014305836","https://openalex.org/W3126795526","https://openalex.org/W2007652691","https://openalex.org/W2026002561"],"abstract_inverted_index":{"Acoustic-to-Articulatory":[0],"Inversion":[1,121],"(AAI)":[2],"aims":[3],"to":[4,44,77,93,97,109,138],"reconstruct":[5],"the":[6,29,38,56,71,95],"trajectories":[7],"of":[8,15,36,52,62,73,114,135],"articulators":[9],"from":[10],"speech":[11],"signals.":[12],"The":[13,105],"training":[14],"an":[16,120,152],"inversion":[17,91],"model":[18,79,92],"involves":[19],"learning":[20,74],"information":[21],"about":[22],"articulatory":[23,63],"invariants":[24,76],"and":[25,46,85,119,123,145],"speaker":[26,103],"idiosyncrasies.":[27,104],"While":[28],"former":[30],"needs":[31],"only":[32],"a":[33,49,60,78,82,89,115,127,168],"limited":[34],"amount":[35],"data,":[37],"latter":[39,96],"requires":[40],"far":[41],"more":[42],"data":[43,101],"learn":[45],"is":[47],"thus":[48],"main":[50],"contributor":[51],"low":[53],"performances":[54],"as":[55,110,151],"AAI":[57,139],"task":[58,72],"suffers":[59],"scarcity":[61],"data.":[64],"Regarding":[65],"this":[66],"problem,":[67],"we":[68],"proposed":[69],"transferring":[70],"linguistic":[75],"pre-trained":[80,106],"on":[81,142,160,167],"single":[83],"corpus":[84,171],"combining":[86],"it":[87,137],"with":[88],"normal":[90],"allow":[94],"better":[98],"exploit":[99],"given":[100],"for":[102],"model,":[107,112],"referred":[108],"Arti-Invar":[111],"consists":[113],"Content":[116],"Encoder":[117],"Module":[118,122],"was":[124],"trained":[125],"using":[126],"PCC-based":[128],"loss":[129],"function.":[130],"We":[131],"tested":[132],"several":[133],"manners":[134],"applying":[136],"tasks":[140],"performed":[141],"other":[143],"corpora":[144],"found":[146],"that":[147],"taking":[148],"its":[149,173],"output":[150],"additional":[153],"input":[154],"features":[155],"could":[156],"bring":[157],"slight":[158],"improvements":[159],"reconstruction":[161],"scores":[162],"(0.07":[163],"mm).":[164],"Further":[165],"experiments":[166],"Chinese":[169],"EMA":[170],"proved":[172],"generalizability.":[174]},"counts_by_year":[],"updated_date":"2025-12-22T23:10:17.713674","created_date":"2025-10-10T00:00:00"}
