{"id":"https://openalex.org/W7139957071","doi":"https://doi.org/10.48550/arxiv.2603.18612","title":"DiscoPhon: Benchmarking the Unsupervised Discovery of Phoneme Inventories With Discrete Speech Units","display_name":"DiscoPhon: Benchmarking the Unsupervised Discovery of Phoneme Inventories With Discrete Speech Units","publication_year":2026,"publication_date":"2026-03-19","ids":{"openalex":"https://openalex.org/W7139957071","doi":"https://doi.org/10.48550/arxiv.2603.18612"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.18612","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18612","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.18612","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028752145","display_name":"Maxime Poli","orcid":"https://orcid.org/0000-0002-9377-9150"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Poli, Maxime","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5121186908","display_name":"Manel Khentout","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Khentout, Manel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106912550","display_name":"Angelo Ortiz Tandazo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tandazo, Angelo Ortiz","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069239428","display_name":"Ewan Dunbar","orcid":"https://orcid.org/0000-0001-9603-953X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dunbar, Ewan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130221397","display_name":"Emmanuel Chemla","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chemla, Emmanuel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130248301","display_name":"Emmanuel Dupoux","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dupoux, Emmanuel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.5317000150680542,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.5317000150680542,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.11630000174045563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.07819999754428864,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/benchmarking","display_name":"Benchmarking","score":0.5971999764442444},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.593999981880188},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4810999929904938},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.36820000410079956},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.295199990272522},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.28610000014305115}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7296000123023987},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6672000288963318},{"id":"https://openalex.org/C86251818","wikidata":"https://www.wikidata.org/wiki/Q816754","display_name":"Benchmarking","level":2,"score":0.5971999764442444},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.593999981880188},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5045999884605408},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4810999929904938},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44339999556541443},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.36820000410079956},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.295199990272522},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.26179999113082886},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.26010000705718994},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.251800000667572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.18612","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18612","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.18612","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.18612","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.5555415749549866}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,75],"introduce":[1],"DiscoPhon,":[2],"a":[3,26,39,52,58,61],"multilingual":[4,79],"benchmark":[5],"for":[6,69,95],"evaluating":[7],"unsupervised":[8],"phoneme":[9,54],"discovery":[10],"from":[11],"discrete":[12,46],"speech":[13,37],"units.":[14],"DiscoPhon":[15],"covers":[16],"6":[17,20],"dev":[18],"and":[19,73,81,84],"test":[21],"languages,":[22],"chosen":[23],"to":[24,51,98],"span":[25],"wide":[27],"range":[28],"of":[29,36],"phonemic":[30,87],"contrasts.":[31],"Given":[32],"only":[33],"10":[34],"hours":[35],"in":[38,92],"previously":[40],"unseen":[41],"language,":[42],"systems":[43],"must":[44],"produce":[45],"units":[47,97],"that":[48,86],"are":[49,67],"mapped":[50],"predefined":[53],"inventory,":[55],"through":[56],"either":[57],"many-to-one":[59],"or":[60],"one-to-one":[62],"assignment.":[63],"The":[64],"resulting":[65],"sequences":[66],"evaluated":[68],"unit":[70],"quality,":[71],"recognition":[72],"segmentation.":[74],"provide":[76],"four":[77],"pretrained":[78],"HuBERT":[80],"SpidR":[82],"baselines,":[83],"show":[85],"information":[88],"is":[89],"available":[90],"enough":[91],"current":[93],"models":[94],"derived":[96],"correlate":[99],"well":[100],"with":[101,104],"phonemes,":[102],"though":[103],"variations":[105],"across":[106],"languages.":[107]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-21T00:00:00"}
