{"id":"https://openalex.org/W4392605605","doi":"https://doi.org/10.1145/3640824.3640861","title":"Exploring Accent Similarity for Cross-Accented Speech Recognition","display_name":"Exploring Accent Similarity for Cross-Accented Speech Recognition","publication_year":2024,"publication_date":"2024-01-26","ids":{"openalex":"https://openalex.org/W4392605605","doi":"https://doi.org/10.1145/3640824.3640861"},"language":"en","primary_location":{"id":"doi:10.1145/3640824.3640861","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3640824.3640861","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 8th International Conference on Control Engineering and Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5017933101","display_name":"Hongjie Gu","orcid":"https://orcid.org/0009-0006-8179-1829"},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongjie Gu","raw_affiliation_strings":["Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China"],"affiliations":[{"raw_affiliation_string":"Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China","institution_ids":["https://openalex.org/I17442442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101980581","display_name":"Sun Gang","orcid":"https://orcid.org/0009-0003-0219-6975"},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Gang Sun","raw_affiliation_strings":["Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China"],"affiliations":[{"raw_affiliation_string":"Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China","institution_ids":["https://openalex.org/I17442442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038544378","display_name":"Ran Shen","orcid":"https://orcid.org/0009-0008-6851-6931"},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ran Shen","raw_affiliation_strings":["Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China"],"affiliations":[{"raw_affiliation_string":"Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China","institution_ids":["https://openalex.org/I17442442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005602221","display_name":"Y.H. Wang","orcid":"https://orcid.org/0009-0006-2531-2213"},"institutions":[{"id":"https://openalex.org/I17442442","display_name":"State Grid Corporation of China (China)","ror":"https://ror.org/05twwhs70","country_code":"CN","type":"company","lineage":["https://openalex.org/I17442442"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifan Wang","raw_affiliation_strings":["Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China"],"affiliations":[{"raw_affiliation_string":"Marketing Service Center, State Grid Zhejiang Electric Power Co., Ltd, China","institution_ids":["https://openalex.org/I17442442"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083246300","display_name":"Weihao Jiang","orcid":"https://orcid.org/0009-0006-8093-4437"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Jiang","raw_affiliation_strings":["Zhejiang University, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055689719","display_name":"Junjie Huang","orcid":"https://orcid.org/0009-0009-2663-0848"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junjie Huang","raw_affiliation_strings":["Zhejiang University, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, China","institution_ids":["https://openalex.org/I76130692"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5017933101"],"corresponding_institution_ids":["https://openalex.org/I17442442"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02068698,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"232","last_page":"237"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.7509353160858154},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6560428142547607},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6208703517913818},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5667291283607483},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.520897388458252},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4473758935928345},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.44321373105049133}],"concepts":[{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.7509353160858154},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6560428142547607},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6208703517913818},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5667291283607483},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.520897388458252},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4473758935928345},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.44321373105049133},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3640824.3640861","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3640824.3640861","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 8th International Conference on Control Engineering and Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7200000286102295}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":11,"referenced_works":["https://openalex.org/W2042141988","https://openalex.org/W2786835190","https://openalex.org/W2889494795","https://openalex.org/W2962684181","https://openalex.org/W2962893195","https://openalex.org/W3015585292","https://openalex.org/W3015723617","https://openalex.org/W3096215352","https://openalex.org/W3161686170","https://openalex.org/W3162061711","https://openalex.org/W4292387508"],"related_works":["https://openalex.org/W4367680763","https://openalex.org/W2739335048","https://openalex.org/W1506224037","https://openalex.org/W4213177143","https://openalex.org/W2151922127","https://openalex.org/W4237818736","https://openalex.org/W4252005663","https://openalex.org/W2365914832","https://openalex.org/W2352261042","https://openalex.org/W2314218554"],"abstract_inverted_index":{"In":[0,36],"recent":[1],"years,":[2],"speech":[3,12,18,50],"recognition":[4,19],"has":[5],"made":[6],"significant":[7],"progress,":[8],"but":[9],"recognizing":[10],"accented":[11],"remains":[13],"a":[14,42],"challenge.":[15],"Although":[16],"multi-accent":[17],"models":[20],"have":[21],"exhibited":[22],"remarkable":[23],"capabilities":[24],"across":[25],"different":[26],"accents,":[27],"their":[28],"performance":[29],"still":[30],"degrades":[31],"when":[32],"encountering":[33],"low-resource":[34,138],"accents.":[35,139],"this":[37],"paper,":[38],"we":[39,70,102],"propose":[40],"AccentFusion,":[41],"framework":[43],"leveraging":[44],"accent":[45,59,81,84,108,125],"similarity":[46],"to":[47,57,74,89],"improve":[48],"cross-accent":[49],"recognition.":[51],"AccentFusion":[52,116],"employs":[53],"an":[54],"interaction-augmented":[55],"module":[56],"capture":[58],"similarities":[60],"between":[61],"source":[62,105],"and":[63,106],"target":[64,80,97,107],"accents":[65],"with":[66],"fine-grained":[67],"association.":[68],"Additionally,":[69],"use":[71],"fusion-guided":[72],"loss":[73],"supervise":[75],"the":[76,79,87,96,100,104,112,118],"weights":[77],"of":[78],"while":[82],"learning":[83],"similarity,":[85],"encouraging":[86],"model":[88],"focus":[90],"on":[91,95,111,117,137],"its":[92],"primary":[93],"attention":[94],"accent.":[98],"During":[99],"inference,":[101],"fuse":[103],"features":[109],"based":[110],"similarity.":[113],"We":[114],"evaluate":[115],"CommonVoice":[119],"corpus.":[120],"Experiments":[121],"demonstrate":[122],"that":[123],"fusing":[124],"information":[126],"improves":[127],"over":[128],"fine-tuning":[129],"baseline,":[130],"significantly":[131],"reducing":[132],"word":[133],"error":[134],"rates":[135],"(WER)":[136]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
