{"id":"https://openalex.org/W4408353672","doi":"https://doi.org/10.1109/icassp49660.2025.10887674","title":"Similarity-based Accent Recognition with Continuous and Discrete Self-supervised Speech Representations","display_name":"Similarity-based Accent Recognition with Continuous and Discrete Self-supervised Speech Representations","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408353672","doi":"https://doi.org/10.1109/icassp49660.2025.10887674"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10887674","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887674","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016862479","display_name":"Jun-You Wang","orcid":"https://orcid.org/0000-0002-9119-9259"},"institutions":[{"id":"https://openalex.org/I84653119","display_name":"Academia Sinica","ror":"https://ror.org/05bxb3784","country_code":"TW","type":"facility","lineage":["https://openalex.org/I84653119"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Jun-You Wang","raw_affiliation_strings":["Academia Sinica,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"Academia Sinica,Taipei,Taiwan","institution_ids":["https://openalex.org/I84653119"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053726259","display_name":"Sheng Li","orcid":"https://orcid.org/0000-0001-7636-3797"},"institutions":[{"id":"https://openalex.org/I90023481","display_name":"National Institute of Information and Communications Technology","ror":"https://ror.org/016bgq349","country_code":"JP","type":"facility","lineage":["https://openalex.org/I90023481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sheng Li","raw_affiliation_strings":["National Institute of Information and Communications Technology,Kyoto,Japan"],"affiliations":[{"raw_affiliation_string":"National Institute of Information and Communications Technology,Kyoto,Japan","institution_ids":["https://openalex.org/I90023481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101562441","display_name":"Liang Lu","orcid":"https://orcid.org/0000-0003-1154-1892"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li-An Lu","raw_affiliation_strings":["Taipei First Girls High School,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"Taipei First Girls High School,Taipei,Taiwan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109009345","display_name":"Shanchen Kao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sydney Chia-Chun Kao","raw_affiliation_strings":["Taipei First Girls High School,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"Taipei First Girls High School,Taipei,Taiwan","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073451247","display_name":"Jyh\u2010Shing Roger Jang","orcid":"https://orcid.org/0000-0002-7319-9095"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Jyh-Shing Roger Jang","raw_affiliation_strings":["National Taiwan University,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University,Taipei,Taiwan","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5016862479"],"corresponding_institution_ids":["https://openalex.org/I84653119"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01835917,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9144999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9144999980926514,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7509740591049194},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.69081050157547},{"id":"https://openalex.org/keywords/stress","display_name":"Stress (linguistics)","score":0.6775577664375305},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.6733890771865845},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5650144219398499},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5093642473220825},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41386252641677856}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7509740591049194},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.69081050157547},{"id":"https://openalex.org/C2776756274","wikidata":"https://www.wikidata.org/wiki/Q181767","display_name":"Stress (linguistics)","level":2,"score":0.6775577664375305},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.6733890771865845},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5650144219398499},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5093642473220825},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41386252641677856},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icassp49660.2025.10887674","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10887674","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:t2r2.star.titech.ac.jp:50729906","is_oa":false,"landing_page_url":"http://t2r2.star.titech.ac.jp/cgi-bin/publicationinfo.cgi?q_publication_content_number=CTT100931503","pdf_url":null,"source":{"id":"https://openalex.org/S4377196385","display_name":"Tokyo Tech Research Repository (Tokyo Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114531698","host_organization_name":"Tokyo Institute of Technology","host_organization_lineage":["https://openalex.org/I114531698"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320331164","display_name":"National Science and Technology Council","ror":"https://ror.org/00wnb9798"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2936774411","https://openalex.org/W3012374325","https://openalex.org/W3025754292","https://openalex.org/W3044481399","https://openalex.org/W3089432592","https://openalex.org/W3160475509","https://openalex.org/W3161674998","https://openalex.org/W3162508345","https://openalex.org/W3197530164","https://openalex.org/W3197561413","https://openalex.org/W3197580070","https://openalex.org/W3200084433","https://openalex.org/W3209984917","https://openalex.org/W4283703884","https://openalex.org/W4287854499","https://openalex.org/W4293234010","https://openalex.org/W4297841566","https://openalex.org/W4328031121","https://openalex.org/W4385151928","https://openalex.org/W4385245566","https://openalex.org/W4385823404","https://openalex.org/W4388642341","https://openalex.org/W4392543668","https://openalex.org/W4392911117","https://openalex.org/W6772349387","https://openalex.org/W6777335856","https://openalex.org/W6784874394"],"related_works":["https://openalex.org/W4367680763","https://openalex.org/W2739335048","https://openalex.org/W1506224037","https://openalex.org/W4213177143","https://openalex.org/W2151922127","https://openalex.org/W4237818736","https://openalex.org/W4252005663","https://openalex.org/W2365914832","https://openalex.org/W2352261042","https://openalex.org/W2314218554"],"abstract_inverted_index":{"The":[0],"primary":[1],"challenge":[2],"in":[3,7,30,127],"accent":[4,27,70,77,92,124,134],"recognition":[5,135],"lies":[6],"data":[8,24],"scarcity":[9],"due":[10],"to":[11,60,89,99,122],"the":[12,19,76,80,97,109,147,160],"high":[13],"diversity":[14],"of":[15,21,68,83,111,144,157],"accents,":[16],"which":[17],"make":[18],"collection":[20],"large-scale":[22],"training":[23],"for":[25],"each":[26],"almost":[28],"impossible":[29],"practice.":[31],"To":[32],"overcome":[33],"this":[34],"challenge,":[35],"we":[36],"propose":[37],"a":[38,61,66,117],"simple":[39],"solution":[40],"that":[41,102,137],"leverages":[42],"both":[43,168],"continuous":[44],"and":[45,65,79,119,151],"discrete":[46],"feature":[47],"representations":[48],"from":[49],"pretrained":[50],"speech":[51],"self-supervised":[52],"learning":[53],"(SSL)":[54],"models.":[55],"Our":[56,114],"model":[57,98,112,140],"is":[58,87],"simplified":[59],"linear":[62],"projection":[63],"layer":[64],"set":[67],"trainable":[69],"class":[71],"embeddings.":[72],"Cosine":[73],"similarity":[74],"between":[75],"embeddings":[78],"latent":[81],"features":[82,101],"an":[84,142,152],"audio":[85],"sample":[86],"used":[88],"predict":[90],"its":[91],"class.":[93],"This":[94],"approach":[95],"enables":[96],"access":[100],"contain":[103],"rich":[104],"accent-related":[105],"information":[106],"while":[107],"reducing":[108],"risk":[110],"overfitting.":[113],"method":[115],"provides":[116],"practical":[118],"efficient":[120],"way":[121],"tackle":[123],"recognition,":[125],"especially":[126],"low-resource":[128],"scenarios.":[129],"Experimental":[130],"results":[131,166],"on":[132,146,159,167],"English":[133],"show":[136],"our":[138],"best":[139],"achieves":[141],"accuracy":[143],"84.0%":[145],"AESRC":[148],"2020":[149],"dataset":[150],"Unweighted":[153],"Average":[154],"Recall":[155],"(UAR)":[156],"50.0%":[158],"VCTK":[161],"corpus,":[162],"setting":[163],"new":[164],"state-of-the-art":[165],"datasets.":[169]},"counts_by_year":[],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
