{"id":"https://openalex.org/W4372266975","doi":"https://doi.org/10.1109/icassp49357.2023.10096406","title":"On Unsupervised Uncertainty-Driven Speech Pseudo-Label Filtering and Model Calibration","display_name":"On Unsupervised Uncertainty-Driven Speech Pseudo-Label Filtering and Model Calibration","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372266975","doi":"https://doi.org/10.1109/icassp49357.2023.10096406"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096406","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10096406","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5067313233","display_name":"Nauman Dawalatabad","orcid":"https://orcid.org/0000-0003-1592-6300"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Nauman Dawalatabad","raw_affiliation_strings":["MIT Computer Science and Artificial Intelligence Laboratory,Cambridge,MA,USA","MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory,Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008673783","display_name":"Sameer Khurana","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Sameer Khurana","raw_affiliation_strings":["MIT Computer Science and Artificial Intelligence Laboratory,Cambridge,MA,USA","MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory,Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025212823","display_name":"Antoine Laurent","orcid":"https://orcid.org/0000-0002-2653-1008"},"institutions":[{"id":"https://openalex.org/I234216984","display_name":"Universit\u00e9 Nantes Angers Le Mans","ror":"https://ror.org/0406t3m57","country_code":"FR","type":"education","lineage":["https://openalex.org/I234216984"]},{"id":"https://openalex.org/I4210108471","display_name":"Le Mans Universit\u00e9","ror":"https://ror.org/01mtcc283","country_code":"FR","type":"education","lineage":["https://openalex.org/I4210108471"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Antoine Laurent","raw_affiliation_strings":["LIUM - Le Mans University,France","LIUM - Le Mans University, France"],"affiliations":[{"raw_affiliation_string":"LIUM - Le Mans University,France","institution_ids":["https://openalex.org/I234216984","https://openalex.org/I4210108471"]},{"raw_affiliation_string":"LIUM - Le Mans University, France","institution_ids":["https://openalex.org/I234216984","https://openalex.org/I4210108471"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112758056","display_name":"James Glass","orcid":"https://orcid.org/0000-0002-3097-360X"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"James Glass","raw_affiliation_strings":["MIT Computer Science and Artificial Intelligence Laboratory,Cambridge,MA,USA","MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA"],"affiliations":[{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory,Cambridge,MA,USA","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"MIT Computer Science and Artificial Intelligence Laboratory, Cambridge, MA, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5067313233"],"corresponding_institution_ids":["https://openalex.org/I4210164862"],"apc_list":null,"apc_paid":null,"fwci":0.3497,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.63056099,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":"abs 1909 13788","issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9891999959945679,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7422865629196167},{"id":"https://openalex.org/keywords/calibration","display_name":"Calibration","score":0.7264252305030823},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5568448901176453},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4970739185810089},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3335990309715271},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13094189763069153},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.09520244598388672}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7422865629196167},{"id":"https://openalex.org/C165838908","wikidata":"https://www.wikidata.org/wiki/Q736777","display_name":"Calibration","level":2,"score":0.7264252305030823},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5568448901176453},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4970739185810089},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3335990309715271},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13094189763069153},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.09520244598388672}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096406","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10096406","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.7699999809265137,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W582134693","https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W1731081199","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2514741789","https://openalex.org/W2626967530","https://openalex.org/W2739883972","https://openalex.org/W2803859746","https://openalex.org/W2936774411","https://openalex.org/W2962687275","https://openalex.org/W2962780374","https://openalex.org/W2976223659","https://openalex.org/W3042021992","https://openalex.org/W3092609815","https://openalex.org/W3112702554","https://openalex.org/W3163464943","https://openalex.org/W3198694222","https://openalex.org/W3203098807","https://openalex.org/W4385573655","https://openalex.org/W6617145748","https://openalex.org/W6623517193","https://openalex.org/W6629717138","https://openalex.org/W6637618735","https://openalex.org/W6739651123","https://openalex.org/W6768222176","https://openalex.org/W6780686684"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2033914206","https://openalex.org/W2042327336"],"abstract_inverted_index":{"Pseudo-label":[0],"(PL)":[1],"filtering":[2,86,108,206],"forms":[3],"a":[4,23,49,101,193,199],"crucial":[5,73],"part":[6],"of":[7,202,211],"Self-Training":[8,17],"(ST)":[9],"methods":[10,213],"for":[11,40,67],"unsupervised":[12],"domain":[13,28,44,164,216],"adaptation.":[14],"Dropout-based":[15],"Uncertainty-driven":[16],"(DUST)":[18],"proceeds":[19],"by":[20,128,141],"first":[21],"training":[22,80],"teacher":[24,33,66,113,137],"model":[25,34,79,95,138,186,195],"on":[26,51,111,119,214],"source":[27,161],"labeled":[29,53],"data.":[30,45,56],"Then,":[31],"the":[32,41,62,65,68,77,93,112,125,136,181,203],"is":[35,59,85],"used":[36],"to":[37,170,188],"provide":[38],"PLs":[39,89],"unlabeled":[42,120],"target":[43,163,215],"Finally,":[46],"we":[47,99,149,177],"train":[48],"student":[50,63,78,94],"augmented":[52],"and":[54,104,162,190],"pseudo-labeled":[55],"The":[57],"process":[58],"iterative,":[60],"where":[61],"becomes":[64],"next":[69],"DUST":[70,83,189,204],"iteration.":[71],"A":[72],"step":[74],"that":[75,90,151,192],"precedes":[76],"in":[81,183],"each":[82],"iteration":[84],"out":[87],"noisy":[88],"could":[91],"lead":[92],"astray.":[96],"In":[97,146],"DUST,":[98],"proposed":[100],"simple,":[102],"effective,":[103],"theoretically":[105],"sound":[106],"PL":[107,153,205],"strategy":[109],"based":[110],"model\u2019s":[114,126],"uncertainty":[115,127],"about":[116],"its":[117],"predictions":[118],"speech":[121],"utterances.":[122],"We":[123,166,208],"estimate":[124],"computing":[129],"disagreement":[130],"amongst":[131],"multiple":[132],"samples":[133],"drawn":[134],"from":[135,180],"during":[139],"inference":[140],"injecting":[142],"noise":[143],"via":[144],"dropout.":[145],"this":[147,174],"work,":[148],"show":[150,191],"DUST\u2019s":[152],"filtering,":[154],"as":[155],"initially":[156],"used,":[157],"fail":[158],"under":[159],"severe":[160],"mismatch.":[165],"suggest":[167],"several":[168],"approaches":[169],"eliminate":[171],"or":[172],"alleviate":[173],"issue.":[175],"Further,":[176],"bring":[178],"insights":[179],"research":[182],"neural":[184],"network":[185],"calibration":[187],"well-calibrated":[194],"correlates":[196],"strongly":[197],"with":[198],"positive":[200],"outcome":[201],"step.":[207],"demonstrate":[209],"effectiveness":[210],"our":[212],"GigaSpeech":[217],"YouTube":[218],"dataset.":[219]},"counts_by_year":[{"year":2023,"cited_by_count":2}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
