{"id":"https://openalex.org/W4372338315","doi":"https://doi.org/10.1109/icassp49357.2023.10097071","title":"Intermpl: Momentum Pseudo-Labeling With Intermediate CTC Loss","display_name":"Intermpl: Momentum Pseudo-Labeling With Intermediate CTC Loss","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372338315","doi":"https://doi.org/10.1109/icassp49357.2023.10097071"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10097071","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10097071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102017467","display_name":"Yosuke Higuchi","orcid":"https://orcid.org/0000-0003-4500-8957"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]},{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["JP","US"],"is_corresponding":true,"raw_author_name":"Yosuke Higuchi","raw_affiliation_strings":["Carnegie Mellon University,USA","Waseda University, Japan","Carnegie Mellon University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Waseda University, Japan","institution_ids":["https://openalex.org/I150744194"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087632404","display_name":"Tetsuji Ogawa","orcid":"https://orcid.org/0000-0002-7316-2073"},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tetsuji Ogawa","raw_affiliation_strings":["Waseda University,Japan","Waseda University, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Waseda University,Japan","institution_ids":["https://openalex.org/I150744194"]},{"raw_affiliation_string":"Waseda University, Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101188700","display_name":"Tetsunori Kobayashi","orcid":null},"institutions":[{"id":"https://openalex.org/I150744194","display_name":"Waseda University","ror":"https://ror.org/00ntfnx83","country_code":"JP","type":"education","lineage":["https://openalex.org/I150744194"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Tetsunori Kobayashi","raw_affiliation_strings":["Waseda University,Japan","Waseda University, Japan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Waseda University,Japan","institution_ids":["https://openalex.org/I150744194"]},{"raw_affiliation_string":"Waseda University, Japan","institution_ids":["https://openalex.org/I150744194"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001291873","display_name":"Shinji Watanabe","orcid":"https://orcid.org/0000-0002-5970-8631"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University,USA","Carnegie Mellon University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5102017467"],"corresponding_institution_ids":["https://openalex.org/I150744194","https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04092505,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7397528290748596},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.6859993934631348},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.6399914026260376},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5529685616493225},{"id":"https://openalex.org/keywords/conditional-independence","display_name":"Conditional independence","score":0.5070257186889648},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.501962423324585},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4656943082809448},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.45646512508392334},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4408568739891052},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3978872299194336},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.33808356523513794},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3269980251789093},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.13265931606292725},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10741275548934937}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7397528290748596},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6859993934631348},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.6399914026260376},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5529685616493225},{"id":"https://openalex.org/C79772020","wikidata":"https://www.wikidata.org/wiki/Q5159264","display_name":"Conditional independence","level":2,"score":0.5070257186889648},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.501962423324585},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4656943082809448},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.45646512508392334},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4408568739891052},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3978872299194336},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.33808356523513794},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3269980251789093},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.13265931606292725},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10741275548934937},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10097071","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icassp49357.2023.10097071","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320337495","display_name":"Technology Development","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":62,"referenced_works":["https://openalex.org/W82886505","https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W1522301498","https://openalex.org/W1524333225","https://openalex.org/W1828163288","https://openalex.org/W2102113734","https://openalex.org/W2127141656","https://openalex.org/W2130942839","https://openalex.org/W2133564696","https://openalex.org/W2327501763","https://openalex.org/W2577366047","https://openalex.org/W2799473636","https://openalex.org/W2884975363","https://openalex.org/W2892009249","https://openalex.org/W2936774411","https://openalex.org/W2940322076","https://openalex.org/W2944255943","https://openalex.org/W2953070460","https://openalex.org/W2963979492","https://openalex.org/W2972818416","https://openalex.org/W2995181338","https://openalex.org/W2998532468","https://openalex.org/W3015522062","https://openalex.org/W3015537910","https://openalex.org/W3015737168","https://openalex.org/W3015960524","https://openalex.org/W3026041220","https://openalex.org/W3096273170","https://openalex.org/W3096338464","https://openalex.org/W3097777922","https://openalex.org/W3101648800","https://openalex.org/W3103005696","https://openalex.org/W3162249256","https://openalex.org/W3162833755","https://openalex.org/W3163793923","https://openalex.org/W3197140813","https://openalex.org/W3197223534","https://openalex.org/W3198098585","https://openalex.org/W3205405669","https://openalex.org/W3205920203","https://openalex.org/W3206573929","https://openalex.org/W4225755266","https://openalex.org/W4226031090","https://openalex.org/W4289824098","https://openalex.org/W4385245566","https://openalex.org/W6623517193","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6638749077","https://openalex.org/W6675365184","https://openalex.org/W6679434410","https://openalex.org/W6679436768","https://openalex.org/W6732447497","https://openalex.org/W6733814495","https://openalex.org/W6739901393","https://openalex.org/W6768009688","https://openalex.org/W6772883055","https://openalex.org/W6776145779","https://openalex.org/W6781315908","https://openalex.org/W6784436999","https://openalex.org/W6802800485"],"related_works":["https://openalex.org/W2171218219","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W4327525404","https://openalex.org/W4287185323","https://openalex.org/W3150905897","https://openalex.org/W1520183331","https://openalex.org/W2734842993","https://openalex.org/W2168175994","https://openalex.org/W2049473509"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"InterMPL,":[3],"a":[4,25,182],"semi-supervised":[5,65,165],"learning":[6],"method":[7],"of":[8,101,195],"end-to-end":[9],"automatic":[10],"speech":[11],"recognition":[12],"(ASR)":[13],"that":[14,129,138,168],"performs":[15],"pseudo-labeling":[16],"(PL)":[17],"with":[18],"intermediate":[19,110,135,159,197],"supervision.":[20],"Momentum":[21],"PL":[22],"(MPL)":[23],"trains":[24],"connectionist":[26],"temporal":[27],"classification":[28],"(CTC)-based":[29],"model":[30,178],"on":[31,38,123],"unlabeled":[32],"data":[33],"by":[34,108,113,179],"continuously":[35],"generating":[36,78],"pseudo-labels":[37,150],"the":[39,52,88,93,99,114,139,169,193,196],"fly":[40],"and":[41,55,75,125,154,174],"improving":[42],"their":[43],"quality.":[44],"In":[45,187],"contrast":[46],"to":[47,70,92,105,134,181],"autoregressive":[48,89],"formulations,":[49],"such":[50,137],"as":[51,156],"attention-based":[53],"encoder-decoder":[54],"transducer,":[56],"CTC":[57,82,132],"is":[58,143],"well":[59],"suited":[60],"for":[61,158],"MPL,":[62],"or":[63],"PL-based":[64],"ASR":[66,177],"in":[67,117,163],"general,":[68],"owing":[69],"its":[71],"simple/fast":[72],"inference":[73],"algorithm":[74],"robustness":[76],"against":[77],"collapsed":[79],"labels.":[80],"However,":[81],"generally":[83],"yields":[84],"inferior":[85],"performance":[86,100,185],"than":[87],"models":[90],"due":[91],"conditional":[94,127,140],"independence":[95,141],"assumption,":[96],"thereby":[97],"limiting":[98],"MPL.":[102],"We":[103,146],"propose":[104],"enhance":[106],"MPL":[107,173],"introducing":[109],"loss,":[111],"inspired":[112],"recent":[115],"advances":[116],"CTC-based":[118],"modeling.":[119],"Specifically,":[120],"we":[121],"focus":[122],"self-conditional":[124],"hierarchical":[126],"CTC,":[128],"apply":[130],"auxiliary":[131],"losses":[133],"layers":[136],"assumption":[142],"explicitly":[144],"relaxed.":[145],"also":[147],"explore":[148],"how":[149],"should":[151],"be":[152],"generated":[153],"used":[155],"supervision":[157],"losses.":[160],"Experimental":[161],"results":[162],"different":[164],"settings":[166],"demonstrate":[167],"proposed":[170],"approach":[171],"outperforms":[172],"improves":[175],"an":[176],"up":[180],"12.1%":[183],"absolute":[184],"gain.":[186],"addition,":[188],"our":[189],"detailed":[190],"analysis":[191],"validates":[192],"importance":[194],"loss.":[198]},"counts_by_year":[],"updated_date":"2026-06-05T09:01:59.212387","created_date":"2025-10-10T00:00:00"}
