{"id":"https://openalex.org/W4394773771","doi":"https://doi.org/10.1162/tacl_a_00656","title":"What Do Self-Supervised Speech Models Know About Words?","display_name":"What Do Self-Supervised Speech Models Know About Words?","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4394773771","doi":"https://doi.org/10.1162/tacl_a_00656"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00656","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00656","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00656/2362252/tacl_a_00656.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00656/2362252/tacl_a_00656.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059672669","display_name":"Ankita Pasad","orcid":null},"institutions":[{"id":"https://openalex.org/I160992636","display_name":"Toyota Technological Institute at Chicago","ror":"https://ror.org/02sn5gb64","country_code":"US","type":"education","lineage":["https://openalex.org/I160992636"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ankita Pasad","raw_affiliation_strings":["Toyota Technological Institute at Chicago, USA. ankitap@ttic.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Toyota Technological Institute at Chicago, USA. ankitap@ttic.edu","institution_ids":["https://openalex.org/I160992636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072923563","display_name":"Chung-Ming Chien","orcid":null},"institutions":[{"id":"https://openalex.org/I160992636","display_name":"Toyota Technological Institute at Chicago","ror":"https://ror.org/02sn5gb64","country_code":"US","type":"education","lineage":["https://openalex.org/I160992636"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Chung-Ming Chien","raw_affiliation_strings":["Toyota Technological Institute at Chicago, USA. cmchien@ttic.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Toyota Technological Institute at Chicago, USA. cmchien@ttic.edu","institution_ids":["https://openalex.org/I160992636"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007445208","display_name":"Shane Settle","orcid":null},"institutions":[{"id":"https://openalex.org/I160992636","display_name":"Toyota Technological Institute at Chicago","ror":"https://ror.org/02sn5gb64","country_code":"US","type":"education","lineage":["https://openalex.org/I160992636"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shane Settle","raw_affiliation_strings":["Toyota Technological Institute at Chicago, USA. settle.shane@ttic.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Toyota Technological Institute at Chicago, USA. settle.shane@ttic.edu","institution_ids":["https://openalex.org/I160992636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5015602781","display_name":"Karen Livescu","orcid":"https://orcid.org/0000-0003-4962-946X"},"institutions":[{"id":"https://openalex.org/I160992636","display_name":"Toyota Technological Institute at Chicago","ror":"https://ror.org/02sn5gb64","country_code":"US","type":"education","lineage":["https://openalex.org/I160992636"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Karen Livescu","raw_affiliation_strings":["Toyota Technological Institute at Chicago, USA. klivescu@ttic.edu"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Toyota Technological Institute at Chicago, USA. klivescu@ttic.edu","institution_ids":["https://openalex.org/I160992636"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007445208","https://openalex.org/A5015602781","https://openalex.org/A5059672669","https://openalex.org/A5072923563"],"corresponding_institution_ids":["https://openalex.org/I160992636"],"apc_list":null,"apc_paid":null,"fwci":9.0074,"has_fulltext":true,"cited_by_count":30,"citation_normalized_percentile":{"value":0.98190464,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"12","issue":null,"first_page":"372","last_page":"391"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8587144613265991},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6882875561714172},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6417536735534668},{"id":"https://openalex.org/keywords/pronunciation","display_name":"Pronunciation","score":0.5664202570915222},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5434106588363647},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.533831775188446},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5198912620544434},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5099679231643677},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5042635202407837},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4729040861129761},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.42998117208480835},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.21657994389533997},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.09121659398078918}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8587144613265991},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6882875561714172},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6417536735534668},{"id":"https://openalex.org/C2780844864","wikidata":"https://www.wikidata.org/wiki/Q184377","display_name":"Pronunciation","level":2,"score":0.5664202570915222},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5434106588363647},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.533831775188446},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5198912620544434},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5099679231643677},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5042635202407837},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4729040861129761},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.42998117208480835},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.21657994389533997},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.09121659398078918},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00656","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00656","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00656/2362252/tacl_a_00656.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:6e21aeba83f04f3eb3de9d9c960f20d9","is_oa":true,"landing_page_url":"https://doaj.org/article/6e21aeba83f04f3eb3de9d9c960f20d9","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 12 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00656","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00656","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00656/2362252/tacl_a_00656.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.5099999904632568,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"},{"score":0.4399999976158142,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1523888516","display_name":null,"funder_award_id":"FA9550-","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G1766360814","display_name":null,"funder_award_id":"FA9550-18-1-0166","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"},{"id":"https://openalex.org/G5809100787","display_name":null,"funder_award_id":"FA9550","funder_id":"https://openalex.org/F4320338279","funder_display_name":"Air Force Office of Scientific Research"}],"funders":[{"id":"https://openalex.org/F4320338279","display_name":"Air Force Office of Scientific Research","ror":"https://ror.org/011e9bt93"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394773771.pdf","grobid_xml":"https://content.openalex.org/works/W4394773771.grobid-xml"},"referenced_works_count":117,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W569478347","https://openalex.org/W1494198834","https://openalex.org/W1558402681","https://openalex.org/W1606268232","https://openalex.org/W1632114991","https://openalex.org/W1967924372","https://openalex.org/W2025341678","https://openalex.org/W2059652594","https://openalex.org/W2065157922","https://openalex.org/W2145410271","https://openalex.org/W2166637769","https://openalex.org/W2190506272","https://openalex.org/W2250539671","https://openalex.org/W2251066368","https://openalex.org/W2251253014","https://openalex.org/W2407151108","https://openalex.org/W2593390416","https://openalex.org/W2602024037","https://openalex.org/W2747874407","https://openalex.org/W2891205112","https://openalex.org/W2906152891","https://openalex.org/W2932675979","https://openalex.org/W2946417913","https://openalex.org/W2962736743","https://openalex.org/W2962753610","https://openalex.org/W2963259843","https://openalex.org/W2963419157","https://openalex.org/W2963482440","https://openalex.org/W2970820321","https://openalex.org/W2970862333","https://openalex.org/W2972584841","https://openalex.org/W2995181338","https://openalex.org/W3008003211","https://openalex.org/W3034273309","https://openalex.org/W3044967013","https://openalex.org/W3093096176","https://openalex.org/W3095706145","https://openalex.org/W3096196861","https://openalex.org/W3096656254","https://openalex.org/W3097777922","https://openalex.org/W3110458199","https://openalex.org/W3119308075","https://openalex.org/W3150635893","https://openalex.org/W3150750326","https://openalex.org/W3155744586","https://openalex.org/W3160799772","https://openalex.org/W3163596720","https://openalex.org/W3174311593","https://openalex.org/W3197580070","https://openalex.org/W3198266945","https://openalex.org/W3198694222","https://openalex.org/W3198782837","https://openalex.org/W3198815374","https://openalex.org/W3202070718","https://openalex.org/W3203140070","https://openalex.org/W3209984917","https://openalex.org/W3209993061","https://openalex.org/W4206075291","https://openalex.org/W4223651314","https://openalex.org/W4224875474","https://openalex.org/W4225529283","https://openalex.org/W4226103796","https://openalex.org/W4226380987","https://openalex.org/W4280638376","https://openalex.org/W4281492411","https://openalex.org/W4283694096","https://openalex.org/W4285250921","https://openalex.org/W4287887773","https://openalex.org/W4296710617","https://openalex.org/W4303649106","https://openalex.org/W4306317873","https://openalex.org/W4307680525","https://openalex.org/W4310895557","https://openalex.org/W4313182775","https://openalex.org/W4319779871","https://openalex.org/W4319862401","https://openalex.org/W4319862479","https://openalex.org/W4372270126","https://openalex.org/W4372346125","https://openalex.org/W4375869060","https://openalex.org/W4375869259","https://openalex.org/W4385245566","https://openalex.org/W4385484924","https://openalex.org/W4385571440","https://openalex.org/W4385573456","https://openalex.org/W4385574560","https://openalex.org/W4385822254","https://openalex.org/W4385823003","https://openalex.org/W4385823328","https://openalex.org/W4385823338","https://openalex.org/W4385823426","https://openalex.org/W4386273179","https://openalex.org/W4391021793","https://openalex.org/W4394671563","https://openalex.org/W6731763572","https://openalex.org/W6739901393","https://openalex.org/W6745682157","https://openalex.org/W6748452836","https://openalex.org/W6752726010","https://openalex.org/W6754420807","https://openalex.org/W6755207826","https://openalex.org/W6761472960","https://openalex.org/W6780218876","https://openalex.org/W6786885278","https://openalex.org/W6787335539","https://openalex.org/W6788328058","https://openalex.org/W6790356757","https://openalex.org/W6792927658","https://openalex.org/W6795952400","https://openalex.org/W6803547063","https://openalex.org/W6810007534","https://openalex.org/W6810168380","https://openalex.org/W6839364956","https://openalex.org/W6839512648","https://openalex.org/W6853627120","https://openalex.org/W6948152991"],"related_works":["https://openalex.org/W2393940967","https://openalex.org/W2385598138","https://openalex.org/W2346578824","https://openalex.org/W2366925922","https://openalex.org/W2159591557","https://openalex.org/W2115592387","https://openalex.org/W2905950556","https://openalex.org/W2112534334","https://openalex.org/W120168696","https://openalex.org/W2153245103"],"abstract_inverted_index":{"Abstract":[0],"Many":[1],"self-supervised":[2],"speech":[3,21],"models":[4],"(S3Ms)":[5],"have":[6],"been":[7],"introduced":[8],"over":[9],"the":[10,67,110,124,132],"last":[11],"few":[12],"years,":[13],"improving":[14],"performance":[15,169],"and":[16,53,70,90,106,122,127,134,151,173],"data":[17],"efficiency":[18],"on":[19,145,170],"various":[20],"tasks.":[22],"However,":[23],"these":[24],"empirical":[25],"successes":[26],"alone":[27],"do":[28],"not":[29,118],"give":[30],"a":[31,60,97],"complete":[32],"picture":[33],"of":[34,63,100,136],"what":[35],"is":[36],"learned":[37],"during":[38],"pre-training.":[39],"Recent":[40],"work":[41],"has":[42],"begun":[43],"analyzing":[44],"how":[45],"S3Ms":[46,105],"encode":[47],"certain":[48],"properties,":[49],"such":[50],"as":[51],"phonetic":[52],"speaker":[54],"information,":[55],"but":[56],"we":[57,75],"still":[58],"lack":[59],"proper":[61],"understanding":[62],"knowledge":[64],"encoded":[65],"at":[66],"word":[68,115,149,171,175],"level":[69],"beyond.":[71],"In":[72],"this":[73],"work,":[74],"use":[76],"lightweight":[77],"analysis":[78],"methods":[79,180],"to":[80],"study":[81,99],"segment-level":[82],"linguistic":[83,137],"properties\u2014word":[84],"identity,":[85],"boundaries,":[86],"pronunciation,":[87],"syntactic":[88],"features,":[89],"semantic":[91,152],"features\u2014encoded":[92],"in":[93],"S3Ms.":[94],"We":[95,141],"present":[96],"comparative":[98],"layer-wise":[101],"representations":[102,112],"from":[103],"ten":[104],"find":[107,143],"that":[108,144],"(i)":[109],"frame-level":[111],"within":[113],"each":[114],"segment":[116],"are":[117],"all":[119],"equally":[120],"informative,":[121],"(ii)":[123],"pre-training":[125],"objective":[126],"model":[128],"size":[129],"heavily":[130],"influence":[131],"accessibility":[133],"distribution":[135],"information":[138],"across":[139],"layers.":[140],"also":[142],"several":[146],"tasks\u2014word":[147],"discrimination,":[148],"segmentation,":[150],"sentence":[153],"similarity\u2014S3Ms":[154],"trained":[155],"with":[156],"visual":[157],"grounding":[158],"outperform":[159],"their":[160],"speech-only":[161],"counterparts.":[162],"Finally,":[163],"our":[164],"task-based":[165],"analyses":[166],"demonstrate":[167],"improved":[168],"segmentation":[172],"acoustic":[174],"discrimination":[176],"while":[177],"using":[178],"simpler":[179],"than":[181],"prior":[182],"work.1":[183]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":10},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-02T09:04:35.204637","created_date":"2025-10-10T00:00:00"}
