{"id":"https://openalex.org/W4389799542","doi":"https://doi.org/10.1109/taslp.2023.3343615","title":"Partitioning Attention Weight: Mitigating Adverse Effect of Incorrect Pseudo-Labels for Self-Supervised ASR","display_name":"Partitioning Attention Weight: Mitigating Adverse Effect of Incorrect Pseudo-Labels for Self-Supervised ASR","publication_year":2023,"publication_date":"2023-12-15","ids":{"openalex":"https://openalex.org/W4389799542","doi":"https://doi.org/10.1109/taslp.2023.3343615"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2023.3343615","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3343615","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102972218","display_name":"Jae-Hong Lee","orcid":"https://orcid.org/0009-0008-3717-2988"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Jae-Hong Lee","raw_affiliation_strings":["School of Electronics, Hanyang University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002418613","display_name":"Joon\u2010Hyuk Chang","orcid":"https://orcid.org/0000-0003-2610-2323"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Joon-Hyuk Chang","raw_affiliation_strings":["School of Electronics, Hanyang University, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"School of Electronics, Hanyang University, Seoul, South Korea","institution_ids":["https://openalex.org/I4575257"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5102972218"],"corresponding_institution_ids":["https://openalex.org/I4575257"],"apc_list":null,"apc_paid":null,"fwci":0.3476,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.67952705,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"32","issue":null,"first_page":"891","last_page":"905"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10429","display_name":"EEG and Brain-Computer Interfaces","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/2805","display_name":"Cognitive Neuroscience"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9922000169754028,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adverse-effect","display_name":"Adverse effect","score":0.4920942187309265},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.44280025362968445},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.4125428795814514},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.3342324197292328},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32888656854629517},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.2651127576828003},{"id":"https://openalex.org/keywords/internal-medicine","display_name":"Internal medicine","score":0.08997908234596252}],"concepts":[{"id":"https://openalex.org/C197934379","wikidata":"https://www.wikidata.org/wiki/Q2047938","display_name":"Adverse effect","level":2,"score":0.4920942187309265},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.44280025362968445},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.4125428795814514},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.3342324197292328},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32888656854629517},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.2651127576828003},{"id":"https://openalex.org/C126322002","wikidata":"https://www.wikidata.org/wiki/Q11180","display_name":"Internal medicine","level":1,"score":0.08997908234596252}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2023.3343615","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2023.3343615","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":82,"referenced_works":["https://openalex.org/W222076935","https://openalex.org/W854541894","https://openalex.org/W1494198834","https://openalex.org/W1508165687","https://openalex.org/W1548813916","https://openalex.org/W1556470778","https://openalex.org/W1922655562","https://openalex.org/W1983320747","https://openalex.org/W2111316763","https://openalex.org/W2127141656","https://openalex.org/W2143612262","https://openalex.org/W2160815625","https://openalex.org/W2327501763","https://openalex.org/W2407080277","https://openalex.org/W2617258110","https://openalex.org/W2747874407","https://openalex.org/W2799473636","https://openalex.org/W2802023636","https://openalex.org/W2802248956","https://openalex.org/W2892009249","https://openalex.org/W2933138175","https://openalex.org/W2936774411","https://openalex.org/W2953190524","https://openalex.org/W2962907457","https://openalex.org/W2972818416","https://openalex.org/W2973049979","https://openalex.org/W2979476256","https://openalex.org/W2980878766","https://openalex.org/W2995181338","https://openalex.org/W3004534439","https://openalex.org/W3007328579","https://openalex.org/W3015522062","https://openalex.org/W3015537910","https://openalex.org/W3026041220","https://openalex.org/W3043783436","https://openalex.org/W3093579165","https://openalex.org/W3096338464","https://openalex.org/W3097777922","https://openalex.org/W3101648800","https://openalex.org/W3141100132","https://openalex.org/W3148149416","https://openalex.org/W3160525311","https://openalex.org/W3162105464","https://openalex.org/W3162833755","https://openalex.org/W3163464943","https://openalex.org/W3196824004","https://openalex.org/W3197223534","https://openalex.org/W3197947674","https://openalex.org/W3198098585","https://openalex.org/W3205080563","https://openalex.org/W3209059054","https://openalex.org/W3209976096","https://openalex.org/W3209984917","https://openalex.org/W4224916519","https://openalex.org/W4225529283","https://openalex.org/W4226033575","https://openalex.org/W4297808394","https://openalex.org/W4297841421","https://openalex.org/W4297841888","https://openalex.org/W4311724836","https://openalex.org/W4385656656","https://openalex.org/W4385822648","https://openalex.org/W4386025763","https://openalex.org/W6608730375","https://openalex.org/W6609836551","https://openalex.org/W6623517193","https://openalex.org/W6631190155","https://openalex.org/W6631362777","https://openalex.org/W6632878996","https://openalex.org/W6633249632","https://openalex.org/W6640090968","https://openalex.org/W6675409298","https://openalex.org/W6687566353","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6766648584","https://openalex.org/W6769196770","https://openalex.org/W6780218876","https://openalex.org/W6784614252","https://openalex.org/W6810007534","https://openalex.org/W6844194202","https://openalex.org/W6846807982"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"The":[0,151],"performance":[1,178,182],"of":[2,27,104],"automatic":[3],"speech":[4],"recognition":[5],"(ASR)":[6],"models":[7],"has":[8],"been":[9],"significantly":[10],"improved":[11],"owing":[12],"to":[13,33,49,65,99,128,159],"advances":[14],"in":[15,180],"deep":[16],"learning":[17,36,62,161,186],"and":[18,41,63,80,163],"end-to-end":[19],"approaches.":[20],"However,":[21,70],"these":[22,71],"require":[23],"a":[24,89,120],"large":[25],"amount":[26],"labeled":[28,54],"data,":[29],"which":[30],"are":[31],"expensive":[32],"obtain.":[34],"Semi-supervised":[35],"techniques,":[37],"such":[38],"as":[39,46,147,149],"pseudo-labeling":[40,64,171],"self-supervised":[42,61],"learning,":[43],"have":[44,59],"emerged":[45],"potential":[47],"solutions":[48],"reduce":[50,81],"the":[51,101,133,136,140,165,170],"reliance":[52],"on":[53],"data.":[55],"Recently,":[56],"some":[57],"studies":[58],"combined":[60],"further":[66],"enhance":[67],"ASR":[68,82,181],"performance.":[69,83],"methods":[72],"suffer":[73],"from":[74,144],"incorrect":[75,105,145],"pseudo-labels":[76],"that":[77,132],"propagate":[78],"errors":[79],"In":[84],"this":[85],"paper,":[86],"we":[87],"propose":[88],"novel":[90],"method":[91,114,153],"called":[92],"<italic":[93],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[94],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">partitioning":[95],"attention":[96,123,142],"weight</i>":[97],"(PAW)":[98],"mitigate":[100],"adverse":[102,130],"effects":[103,131],"labels":[106,146],"without":[107],"requiring":[108,156],"additional":[109],"language":[110],"models.":[111],"Our":[112,173],"proposed":[113,152],"isolates":[115],"audio":[116],"segments":[117],"by":[118],"partitioning":[119],"fully":[121],"connected":[122],"weight":[124],"into":[125],"sub-attention":[126],"weights":[127,143],"prevent":[129],"model":[134],"learns":[135],"wrong":[137],"context":[138],"for":[139],"entire":[141],"well":[148],"overfitting.":[150],"is":[154],"simple,":[155],"few":[157],"changes":[158],"existing":[160],"frameworks,":[162],"leverages":[164],"alignment":[166],"information":[167],"obtained":[168],"during":[169],"process.":[172],"experimental":[174],"results":[175],"show":[176],"consistent":[177],"improvements":[179],"across":[183],"various":[184],"semi-supervised":[185],"scenarios.":[187]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
