{"id":"https://openalex.org/W4221140961","doi":"https://doi.org/10.1109/jstsp.2022.3200909","title":"Are Discrete Units Necessary for Spoken Language Modeling?","display_name":"Are Discrete Units Necessary for Spoken Language Modeling?","publication_year":2022,"publication_date":"2022-08-23","ids":{"openalex":"https://openalex.org/W4221140961","doi":"https://doi.org/10.1109/jstsp.2022.3200909"},"language":"en","primary_location":{"id":"doi:10.1109/jstsp.2022.3200909","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2022.3200909","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2203.05936","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tu Anh Nguyen","orcid":"https://orcid.org/0000-0002-9623-042X"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Tu Anh Nguyen","raw_affiliation_strings":["Meta and Inria, Paris, France"],"raw_orcid":"https://orcid.org/0000-0002-9623-042X","affiliations":[{"raw_affiliation_string":"Meta and Inria, Paris, France","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077663332","display_name":"Beno\u00eet Sagot","orcid":"https://orcid.org/0000-0002-0107-8526"},"institutions":[{"id":"https://openalex.org/I1326498283","display_name":"Institut national de recherche en sciences et technologies du num\u00e9rique","ror":"https://ror.org/02kvxyf05","country_code":"FR","type":"government","lineage":["https://openalex.org/I1326498283"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Benoit Sagot","raw_affiliation_strings":["Inria, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Inria, Paris, France","institution_ids":["https://openalex.org/I1326498283"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007620149","display_name":"Emmanuel Dupoux","orcid":"https://orcid.org/0000-0002-7814-2952"},"institutions":[{"id":"https://openalex.org/I1294671590","display_name":"Centre National de la Recherche Scientifique","ror":"https://ror.org/02feahw73","country_code":"FR","type":"government","lineage":["https://openalex.org/I1294671590"]},{"id":"https://openalex.org/I29607241","display_name":"\u00c9cole Normale Sup\u00e9rieure - PSL","ror":"https://ror.org/05a0dhs15","country_code":"FR","type":"other","lineage":["https://openalex.org/I2746051580","https://openalex.org/I29607241"]},{"id":"https://openalex.org/I90669466","display_name":"\u00c9cole des hautes \u00e9tudes en sciences sociales","ror":"https://ror.org/02d9dg697","country_code":"FR","type":"facility","lineage":["https://openalex.org/I90669466"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Emmanuel Dupoux","raw_affiliation_strings":["Meta and EHESS, ENS-PSL, CNRS, Inria, Paris, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Meta and EHESS, ENS-PSL, CNRS, Inria, Paris, France","institution_ids":["https://openalex.org/I29607241","https://openalex.org/I90669466","https://openalex.org/I1294671590"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I1326498283"],"apc_list":null,"apc_paid":null,"fwci":0.2777,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.60935307,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":95},"biblio":{"volume":"16","issue":"6","first_page":"1415","last_page":"1423"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8391000032424927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8391000032424927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.0340999998152256,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.012000000104308128,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6517521739006042},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.6086199283599854},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.43120521306991577},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.39820143580436707}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6517521739006042},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.6086199283599854},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43120521306991577},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.39820143580436707},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/jstsp.2022.3200909","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jstsp.2022.3200909","pdf_url":null,"source":{"id":"https://openalex.org/S42167783","display_name":"IEEE Journal of Selected Topics in Signal Processing","issn_l":"1932-4553","issn":["1932-4553","1941-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2203.05936","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.05936","pdf_url":"https://arxiv.org/pdf/2203.05936","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:HAL:hal-03831707v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-03831707","pdf_url":"https://inria.hal.science/hal-03831707v1/document","source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Journal of Selected Topics in Signal Processing, 2022","raw_type":"Journal articles"},{"id":"doi:10.48550/arxiv.2203.05936","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2203.05936","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2203.05936","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2203.05936","pdf_url":"https://arxiv.org/pdf/2203.05936","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8600000143051147}],"awards":[{"id":"https://openalex.org/G2077798120","display_name":null,"funder_award_id":"ANR-10-IDEX-","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G2169153225","display_name":null,"funder_award_id":"ANR-10-IDEX-0001-02 PSL","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G2614703714","display_name":null,"funder_award_id":"ANR-10-IDEX-0001-02","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G2658974192","display_name":null,"funder_award_id":"ANR-10-IDEX-0001","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G2659019386","display_name":null,"funder_award_id":"ANR-17-EURE-","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G2871517121","display_name":null,"funder_award_id":"10-IDEX-0001-02 PSL","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G3860367528","display_name":null,"funder_award_id":"10-IDEX-0001-02","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G4209519768","display_name":null,"funder_award_id":"ANR-17-EURE-0017","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G4303677324","display_name":null,"funder_award_id":"ANR-17-EURE-0017","funder_id":"https://openalex.org/F4320309949","funder_display_name":"Canadian Institute for Advanced Research"},{"id":"https://openalex.org/G4483770707","display_name":null,"funder_award_id":"17-EURE-0017","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G471331628","display_name":null,"funder_award_id":"ANR-10-IDEX","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G5838787665","display_name":null,"funder_award_id":"ANR-19-P3IA-0001","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G626190807","display_name":null,"funder_award_id":"19-P3IA-0001","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G6803667113","display_name":null,"funder_award_id":"ANR-17","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G7240651115","display_name":null,"funder_award_id":"ANR-10","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G782245399","display_name":null,"funder_award_id":"IDEX-0001-02","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"},{"id":"https://openalex.org/G8318455765","display_name":null,"funder_award_id":"ANR-19","funder_id":"https://openalex.org/F4320320883","funder_display_name":"Agence Nationale de la Recherche"}],"funders":[{"id":"https://openalex.org/F4320309949","display_name":"Canadian Institute for Advanced Research","ror":"https://ror.org/01sdtdd95"},{"id":"https://openalex.org/F4320316918","display_name":"\u00c9cole des Hautes Etudes en Sciences Sociales","ror":"https://ror.org/02d9dg697"},{"id":"https://openalex.org/F4320320883","display_name":"Agence Nationale de la Recherche","ror":"https://ror.org/00rbzpz17"},{"id":"https://openalex.org/F4320326256","display_name":"Grand \u00c9quipement National De Calcul Intensif","ror":"https://ror.org/0010d1q40"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2896457183","https://openalex.org/W2933138175","https://openalex.org/W2964243274","https://openalex.org/W2973049979","https://openalex.org/W3015356564","https://openalex.org/W3016181583","https://openalex.org/W3024308166","https://openalex.org/W3088059392","https://openalex.org/W3197259906","https://openalex.org/W3198815374","https://openalex.org/W3209059054","https://openalex.org/W4226033575","https://openalex.org/W4287887366","https://openalex.org/W6766673545","https://openalex.org/W6769196770","https://openalex.org/W6778883912","https://openalex.org/W6780218876","https://openalex.org/W6786696081","https://openalex.org/W6790356757","https://openalex.org/W6844194202"],"related_works":["https://openalex.org/W2611614995","https://openalex.org/W2368651715","https://openalex.org/W2789919619","https://openalex.org/W3107474891","https://openalex.org/W1552159754","https://openalex.org/W2148757832","https://openalex.org/W4321496520","https://openalex.org/W2293457016","https://openalex.org/W2131420137","https://openalex.org/W1986021162"],"abstract_inverted_index":{"Recent":[0],"work":[1],"in":[2,57,88,102,147],"spoken":[3,89,103],"language":[4,12,41,69,90,104,121,133],"modeling":[5,122],"shows":[6],"the":[7,27,58,61,81,115,125,136,140,148,155],"possibility":[8],"of":[9,32,60,83,127,139,154],"learning":[10],"a":[11,30,40,49,68,132],"unsupervisedly":[13],"from":[14,114],"raw":[15],"audio":[16,28],"without":[17,71],"any":[18],"text":[19],"labels.":[20],"The":[21],"approach":[22],"relies":[23],"first":[24],"on":[25,44,135],"transforming":[26],"into":[29],"sequence":[31],"discrete":[33,50,72,84,137],"units":[34,73,138],"(or":[35],"pseudo-text)":[36],"and":[37,151],"then":[38],"training":[39],"model":[42,70,134],"directly":[43],"such":[45,48],"pseudo-text.":[46],"Is":[47],"bottleneck":[51],"necessary,":[52],"potentially":[53],"introducing":[54],"irreversible":[55],"errors":[56],"encoding":[59],"speech":[62],"signal,":[63],"or":[64],"could":[65],"we":[66,79,130],"learn":[67],"at":[74],"all?":[75],"In":[76],"this":[77,128],"work,":[78],"study":[80],"role":[82],"versus":[85],"continuous":[86,116],"representations":[87],"modeling.":[91,105],"We":[92,106],"show":[93,107],"that":[94,108],"discretization":[95,109],"is":[96],"indeed":[97],"essential":[98],"for":[99],"good":[100],"results":[101,146],"removes":[110],"linguistically":[111],"irrelevant":[112],"information":[113],"features,":[117,142],"helping":[118],"to":[119],"improve":[120],"performances.":[123],"On":[124],"basis":[126],"study,":[129],"train":[131],"HuBERT":[141],"reaching":[143],"new":[144],"state-of-the-art":[145],"lexical,":[149],"syntactic":[150],"semantic":[152],"metrics":[153],"Zero":[156],"Resource":[157],"Speech":[158,164],"Challenge":[159],"2021":[160],"(Track":[161],"1":[162],"-":[163],"Only).":[165]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2025-10-10T00:00:00"}
