{"id":"https://openalex.org/W4404563387","doi":"https://doi.org/10.1109/jstsp.2024.3497655","title":"Coding Speech Through Vocal Tract Kinematics","display_name":"Coding Speech Through Vocal Tract Kinematics","publication_year":2024,"publication_date":"2024-11-20","ids":{"openalex":"https://openalex.org/W4404563387","doi":"https://doi.org/10.1109/jstsp.2024.3497655"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2024.3497655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3497655","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015390421","display_name":"Cheol Jun Cho","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Cheol Jun Cho","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA"],"raw_orcid":"https://orcid.org/0000-0003-2596-3164","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081837203","display_name":"Peter Wu","orcid":"https://orcid.org/0000-0001-6503-3936"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Peter Wu","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071481167","display_name":"Tejas Prabhune","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tejas S. Prabhune","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Dhruv Agarwal","orcid":"https://orcid.org/0009-0007-1276-4891"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhruv Agarwal","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA"],"raw_orcid":"https://orcid.org/0009-0007-1276-4891","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068922218","display_name":"Gopala K. Anumanchipalli","orcid":"https://orcid.org/0000-0002-9714-7740"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gopala K. Anumanchipalli","raw_affiliation_strings":["Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA"],"raw_orcid":"https://orcid.org/0000-0002-9714-7740","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Electrical Engineering and Computer Sciences, University of California, Berkeley, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5015390421"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":2.6491,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.91431232,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"18","issue":"8","first_page":"1427","last_page":"1440"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9126999974250793,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9126999974250793,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.9106000065803528,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/vocal-tract","display_name":"Vocal tract","score":0.79756098985672},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6580373048782349},{"id":"https://openalex.org/keywords/kinematics","display_name":"Kinematics","score":0.6462775468826294},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5995551943778992},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5441516637802124},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.511242151260376},{"id":"https://openalex.org/keywords/linear-predictive-coding","display_name":"Linear predictive coding","score":0.45775142312049866},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3396926522254944},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13336604833602905}],"concepts":[{"id":"https://openalex.org/C47401133","wikidata":"https://www.wikidata.org/wiki/Q748953","display_name":"Vocal tract","level":2,"score":0.79756098985672},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6580373048782349},{"id":"https://openalex.org/C39920418","wikidata":"https://www.wikidata.org/wiki/Q11476","display_name":"Kinematics","level":2,"score":0.6462775468826294},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5995551943778992},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5441516637802124},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.511242151260376},{"id":"https://openalex.org/C59883199","wikidata":"https://www.wikidata.org/wiki/Q1826438","display_name":"Linear predictive coding","level":3,"score":0.45775142312049866},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3396926522254944},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13336604833602905},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jstsp.2024.3497655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2024.3497655","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Gender equality","id":"https://metadata.un.org/sdg/5","score":0.4000000059604645}],"awards":[{"id":"https://openalex.org/G2333081612","display_name":null,"funder_award_id":"2106928","funder_id":"https://openalex.org/F4320309085","funder_display_name":"Center for Selective C-H Functionalization, National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320309085","display_name":"Center for Selective C-H Functionalization, National Science Foundation","ror":"https://ror.org/02h8v7m77"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":87,"referenced_works":["https://openalex.org/W123135133","https://openalex.org/W1531956331","https://openalex.org/W1857238892","https://openalex.org/W1962568395","https://openalex.org/W1965378753","https://openalex.org/W1969229349","https://openalex.org/W1972620250","https://openalex.org/W1977591085","https://openalex.org/W1987414756","https://openalex.org/W1987563002","https://openalex.org/W2023407680","https://openalex.org/W2027933203","https://openalex.org/W2035395798","https://openalex.org/W2040818125","https://openalex.org/W2055115647","https://openalex.org/W2057649128","https://openalex.org/W2059069822","https://openalex.org/W2068447135","https://openalex.org/W2075570558","https://openalex.org/W2125047278","https://openalex.org/W2134539847","https://openalex.org/W2141697795","https://openalex.org/W2159693968","https://openalex.org/W2194775991","https://openalex.org/W2231075402","https://openalex.org/W2466021321","https://openalex.org/W2622158094","https://openalex.org/W2726515241","https://openalex.org/W2752796333","https://openalex.org/W2760103357","https://openalex.org/W2804300206","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2940585064","https://openalex.org/W2962866891","https://openalex.org/W2972359262","https://openalex.org/W2972916618","https://openalex.org/W2981087920","https://openalex.org/W2997591727","https://openalex.org/W3095410713","https://openalex.org/W3133449202","https://openalex.org/W3140429000","https://openalex.org/W3193311858","https://openalex.org/W3196458264","https://openalex.org/W3198275944","https://openalex.org/W3198533616","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3215615641","https://openalex.org/W4206319965","https://openalex.org/W4223430326","https://openalex.org/W4225096077","https://openalex.org/W4225956675","https://openalex.org/W4235733905","https://openalex.org/W4245284779","https://openalex.org/W4297841609","https://openalex.org/W4297841848","https://openalex.org/W4307323391","https://openalex.org/W4372260053","https://openalex.org/W4372260274","https://openalex.org/W4372340876","https://openalex.org/W4372340947","https://openalex.org/W4372341338","https://openalex.org/W4372348980","https://openalex.org/W4375869199","https://openalex.org/W4375869259","https://openalex.org/W4385823191","https://openalex.org/W4392405784","https://openalex.org/W4392902939","https://openalex.org/W4392903423","https://openalex.org/W4392904409","https://openalex.org/W4392909950","https://openalex.org/W4402111787","https://openalex.org/W4404609375","https://openalex.org/W6750483760","https://openalex.org/W6767245602","https://openalex.org/W6783867762","https://openalex.org/W6797957363","https://openalex.org/W6803547063","https://openalex.org/W6846936884","https://openalex.org/W6847363464","https://openalex.org/W6849755210","https://openalex.org/W6855121457","https://openalex.org/W6855885476","https://openalex.org/W6858915148","https://openalex.org/W6862144568","https://openalex.org/W6936113694"],"related_works":["https://openalex.org/W2363301696","https://openalex.org/W2352223112","https://openalex.org/W1570840316","https://openalex.org/W2139283974","https://openalex.org/W4312036005","https://openalex.org/W2383072803","https://openalex.org/W2156505556","https://openalex.org/W2363056088","https://openalex.org/W1000239413","https://openalex.org/W2111212790"],"abstract_inverted_index":{"Vocal":[0],"tract":[1,86],"articulation":[2],"is":[3,110,148,166],"a":[4,40,133,182],"natural,":[5],"grounded":[6],"control":[7],"space":[8],"of":[9,15,36,43,46,84,102,122,162,170,186],"speech":[10,24,47,64,73,103,129],"production.":[11,104],"The":[12,78],"spatiotemporal":[13],"coordination":[14],"articulators":[16,87],"combined":[17],"with":[18,113],"the":[19,98,114,119,145,160,167,178],"vocal":[20,85],"source":[21,89],"shapes":[22],"intelligible":[23],"sounds":[25],"to":[26,117,141],"enable":[27],"effective":[28],"spoken":[29],"communication.":[30],"Based":[31],"on":[32,127],"this":[33,165],"physiological":[34],"grounding":[35],"speech,":[37],"we":[38,131],"propose":[39],"new":[41],"framework":[42,180],"neural":[44],"encoding-decoding":[45],"\u2013":[48],"Speech":[49],"Articulatory":[50],"Coding":[51],"(SPARC).":[52],"SPARC":[53],"comprises":[54],"an":[55,67],"articulatory":[56,61,68,76,79,115,137,173],"analysis":[57],"model":[58,70],"that":[59,71,139],"infers":[60],"features":[62,80],"from":[63,75,151],"audio,":[65],"and":[66,88,95,175],"synthesis":[69],"synthesizes":[72],"audio":[74],"features.":[77],"are":[81,92],"kinematic":[82],"traces":[83],"features,":[90],"which":[91,153],"intuitively":[93],"interpretable":[94],"controllable,":[96],"being":[97],"actual":[99],"physical":[100],"interface":[101],"An":[105],"additional":[106],"speaker":[107,146],"identity":[108],"encoder":[109],"jointly":[111],"trained":[112],"synthesizer":[116,138],"inform":[118],"voice":[120,157],"texture":[121],"individual":[123],"speakers.":[124,143],"By":[125],"training":[126],"large-scale":[128],"data,":[130],"achieve":[132],"fully":[134],"intelligible,":[135],"high-quality":[136],"generalizes":[140],"unseen":[142],"Furthermore,":[144],"embedding":[147],"effectively":[149],"disentangled":[150],"articulations,":[152],"enables":[154],"accent-perserving":[155],"zero-shot":[156],"conversion.":[158],"To":[159],"best":[161],"our":[163],"knowledge,":[164],"first":[168],"demonstration":[169],"universal,":[171],"high-performance":[172],"inference":[174],"synthesis,":[176],"suggesting":[177],"proposed":[179],"as":[181],"powerful":[183],"coding":[184],"system":[185],"speech.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":5}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
