{"id":"https://openalex.org/W4392909595","doi":"https://doi.org/10.1109/icassp48485.2024.10447834","title":"Joint Unsupervised and Supervised Training for Automatic Speech Recognition via Bilevel Optimization","display_name":"Joint Unsupervised and Supervised Training for Automatic Speech Recognition via Bilevel Optimization","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392909595","doi":"https://doi.org/10.1109/icassp48485.2024.10447834"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10447834","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447834","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5013474255","display_name":"A F M Saifuddin Saif","orcid":"https://orcid.org/0000-0002-6163-4678"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"A F M Saif","raw_affiliation_strings":["Rensselaer Polytechnic Institute,Troy,NY,USA","Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute,Troy,NY,USA","institution_ids":["https://openalex.org/I165799507"]},{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102014291","display_name":"Xiaodong Cui","orcid":"https://orcid.org/0000-0003-4865-1307"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaodong Cui","raw_affiliation_strings":["IBM Research AI,T. J. Watson Research Center,NY,USA","T. J. Watson Research Center, IBM Research AI, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI,T. J. Watson Research Center,NY,USA","institution_ids":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]},{"raw_affiliation_string":"T. J. Watson Research Center, IBM Research AI, NY, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100359752","display_name":"Han Shen","orcid":"https://orcid.org/0000-0001-6714-5237"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Han Shen","raw_affiliation_strings":["Rensselaer Polytechnic Institute,Troy,NY,USA","Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute,Troy,NY,USA","institution_ids":["https://openalex.org/I165799507"]},{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088593720","display_name":"Songtao Lu","orcid":"https://orcid.org/0000-0001-9256-9648"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Songtao Lu","raw_affiliation_strings":["IBM Research AI,T. J. Watson Research Center,NY,USA","T. J. Watson Research Center, IBM Research AI, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI,T. J. Watson Research Center,NY,USA","institution_ids":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]},{"raw_affiliation_string":"T. J. Watson Research Center, IBM Research AI, NY, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003725957","display_name":"Brian Kingsbury","orcid":"https://orcid.org/0000-0002-1343-6837"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]},{"id":"https://openalex.org/I4210114115","display_name":"IBM Research - Thomas J. Watson Research Center","ror":"https://ror.org/0265w5591","country_code":"US","type":"facility","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Kingsbury","raw_affiliation_strings":["IBM Research AI,T. J. Watson Research Center,NY,USA","T. J. Watson Research Center, IBM Research AI, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IBM Research AI,T. J. Watson Research Center,NY,USA","institution_ids":["https://openalex.org/I1341412227","https://openalex.org/I4210114115"]},{"raw_affiliation_string":"T. J. Watson Research Center, IBM Research AI, NY, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100783476","display_name":"Tianyi Chen","orcid":"https://orcid.org/0000-0003-3477-1439"},"institutions":[{"id":"https://openalex.org/I165799507","display_name":"Rensselaer Polytechnic Institute","ror":"https://ror.org/01rtyzb94","country_code":"US","type":"education","lineage":["https://openalex.org/I165799507"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianyi Chen","raw_affiliation_strings":["Rensselaer Polytechnic Institute,Troy,NY,USA","Rensselaer Polytechnic Institute, Troy, NY, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Rensselaer Polytechnic Institute,Troy,NY,USA","institution_ids":["https://openalex.org/I165799507"]},{"raw_affiliation_string":"Rensselaer Polytechnic Institute, Troy, NY, USA","institution_ids":["https://openalex.org/I165799507"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2219,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.81477779,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"10931","last_page":"10935"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9108999967575073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9108999967575073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8020548820495605},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.7499145865440369},{"id":"https://openalex.org/keywords/bilevel-optimization","display_name":"Bilevel optimization","score":0.6755712628364563},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6111331582069397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.603675365447998},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5059160590171814},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.45719587802886963},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.36868470907211304},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.1687580645084381},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08052313327789307},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.07416170835494995}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8020548820495605},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.7499145865440369},{"id":"https://openalex.org/C3309286","wikidata":"https://www.wikidata.org/wiki/Q4907693","display_name":"Bilevel optimization","level":3,"score":0.6755712628364563},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6111331582069397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.603675365447998},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5059160590171814},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.45719587802886963},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.36868470907211304},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.1687580645084381},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08052313327789307},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.07416170835494995},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10447834","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10447834","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W1990676004","https://openalex.org/W2091432990","https://openalex.org/W2127141656","https://openalex.org/W2160815625","https://openalex.org/W2165698076","https://openalex.org/W2606904667","https://openalex.org/W2908510526","https://openalex.org/W2936774411","https://openalex.org/W2963433607","https://openalex.org/W2979476256","https://openalex.org/W2980113592","https://openalex.org/W3012518552","https://openalex.org/W3036601975","https://openalex.org/W3097777922","https://openalex.org/W3123350987","https://openalex.org/W3156469768","https://openalex.org/W3199051821","https://openalex.org/W3209059054","https://openalex.org/W3212799896","https://openalex.org/W4226038297","https://openalex.org/W4297808394","https://openalex.org/W4318040964","https://openalex.org/W4320559890","https://openalex.org/W6691770337","https://openalex.org/W6693919493","https://openalex.org/W6736057607","https://openalex.org/W6751797489","https://openalex.org/W6780052409","https://openalex.org/W6780218876","https://openalex.org/W6810673746","https://openalex.org/W6840935462"],"related_works":["https://openalex.org/W4237041411","https://openalex.org/W1588628884","https://openalex.org/W1994745260","https://openalex.org/W2382404424","https://openalex.org/W4300511218","https://openalex.org/W2012267561","https://openalex.org/W3018909868","https://openalex.org/W3040509871","https://openalex.org/W1597439928","https://openalex.org/W2602511199"],"abstract_inverted_index":{"In":[0],"this":[1,57],"paper,":[2],"we":[3,22],"present":[4],"a":[5,33,44],"novel":[6],"bilevel":[7,53],"optimization-based":[8],"training":[9,12,29],"approach":[10],"to":[11,55],"acoustic":[13],"models":[14],"for":[15],"automatic":[16],"speech":[17],"recognition":[18],"(ASR)":[19],"tasks":[20],"that":[21],"term":[23],"bi-level":[24],"joint":[25],"unsupervised":[26,41],"and":[27,35,43,64,76],"supervised":[28,45],"(BL-JUST).":[30],"BL-JUST":[31,83],"employs":[32],"lower":[34],"upper":[36],"level":[37],"optimization":[38,54],"with":[39,61],"an":[40],"loss":[42,46],"respectively,":[47],"leveraging":[48],"recent":[49],"advances":[50],"in":[51],"penalty-based":[52],"solve":[56],"challenging":[58],"ASR":[59],"problem":[60],"affordable":[62],"complexity":[63],"rigorous":[65],"convergence":[66],"guarantees.":[67],"To":[68],"evaluate":[69],"BL-JUST,":[70],"extensive":[71],"experiments":[72],"on":[73],"the":[74,88],"LibriSpeech":[75],"TED-LIUM":[77],"v2":[78],"datasets":[79],"have":[80],"been":[81],"conducted.":[82],"achieves":[84],"superior":[85],"performance":[86],"over":[87],"commonly":[89],"used":[90],"pre-training":[91],"followed":[92],"by":[93],"fine-tuning":[94],"strategy.":[95]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
