{"id":"https://openalex.org/W4392902944","doi":"https://doi.org/10.1109/icassp48485.2024.10448488","title":"Towards High-Performance and Low-Latency Feature-Based Speaker Adaptation of Conformer Speech Recognition Systems","display_name":"Towards High-Performance and Low-Latency Feature-Based Speaker Adaptation of Conformer Speech Recognition Systems","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392902944","doi":"https://doi.org/10.1109/icassp48485.2024.10448488"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448488","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448488","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106406454","display_name":"Jiajun Deng","orcid":"https://orcid.org/0000-0001-8874-4167"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiajun Deng","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035857878","display_name":"Xurong Xie","orcid":"https://orcid.org/0000-0002-6714-6296"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"funder","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210128818","display_name":"Institute of Software","ror":"https://ror.org/033dfsn42","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210128818"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xurong Xie","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Software,Beijing,China","Institute of Software, Chinese Academy of Sciences, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Software,Beijing,China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Software, Chinese Academy of Sciences, Beijing, China","institution_ids":["https://openalex.org/I4210128818","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042515126","display_name":"Guinan Li","orcid":"https://orcid.org/0000-0002-2206-0237"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guinan Li","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065896539","display_name":"Mingyu Cui","orcid":"https://orcid.org/0000-0001-9781-9407"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mingyu Cui","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003895235","display_name":"Mengzhe Geng","orcid":"https://orcid.org/0000-0002-7886-439X"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Mengzhe Geng","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075023049","display_name":"Zengrui Jin","orcid":"https://orcid.org/0000-0002-2637-7880"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zengrui Jin","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106407750","display_name":"Tianzi Wang","orcid":"https://orcid.org/0009-0005-5823-3039"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tianzi Wang","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026334377","display_name":"Shujie Hu","orcid":"https://orcid.org/0000-0002-8475-4912"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shujie Hu","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101675941","display_name":"Zhaoqing Li","orcid":"https://orcid.org/0000-0001-8649-4934"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhaoqing Li","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037109470","display_name":"Xunying Liu","orcid":"https://orcid.org/0000-0001-6725-1160"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xunying Liu","raw_affiliation_strings":["The Chinese University of Hong Kong,Hong Kong SAR,China","The Chinese University of Hong Kong, Hong Kong SAR, China"],"affiliations":[{"raw_affiliation_string":"The Chinese University of Hong Kong,Hong Kong SAR,China","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"The Chinese University of Hong Kong, Hong Kong SAR, China","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5106406454"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":0.3393,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61267646,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"10976","last_page":"10980"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.770726203918457},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6986182928085327},{"id":"https://openalex.org/keywords/word-error-rate","display_name":"Word error rate","score":0.540435254573822},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.5278059244155884},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.5167995691299438},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5109893083572388},{"id":"https://openalex.org/keywords/subspace-topology","display_name":"Subspace topology","score":0.49715641140937805},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.47865521907806396},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.4367789924144745},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43587127327919006},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3820860683917999}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.770726203918457},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6986182928085327},{"id":"https://openalex.org/C40969351","wikidata":"https://www.wikidata.org/wiki/Q3516228","display_name":"Word error rate","level":2,"score":0.540435254573822},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.5278059244155884},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.5167995691299438},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5109893083572388},{"id":"https://openalex.org/C32834561","wikidata":"https://www.wikidata.org/wiki/Q660730","display_name":"Subspace topology","level":2,"score":0.49715641140937805},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.47865521907806396},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.4367789924144745},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43587127327919006},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3820860683917999},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448488","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448488","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.41999998688697815,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G2087396116","display_name":null,"funder_award_id":"China","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G391238517","display_name":null,"funder_award_id":", and","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5036096801","display_name":null,"funder_award_id":"2023119","funder_id":"https://openalex.org/F4320322847","funder_display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences"},{"id":"https://openalex.org/G5939423041","display_name":null,"funder_award_id":"Technology","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6143720773","display_name":null,"funder_award_id":"62106255","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320322847","display_name":"Youth Innovation Promotion Association of the Chinese Academy of Sciences","ror":"https://ror.org/031141b54"},{"id":"https://openalex.org/F4320335892","display_name":"Youth Innovation Promotion Association","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W82936479","https://openalex.org/W1932968309","https://openalex.org/W1985371235","https://openalex.org/W2080005694","https://openalex.org/W2112021726","https://openalex.org/W2166637769","https://openalex.org/W2239847623","https://openalex.org/W2276408190","https://openalex.org/W2327501763","https://openalex.org/W2394882406","https://openalex.org/W2407299475","https://openalex.org/W2514740276","https://openalex.org/W2766219058","https://openalex.org/W2794506738","https://openalex.org/W2891980359","https://openalex.org/W2892009249","https://openalex.org/W2916301830","https://openalex.org/W2962940707","https://openalex.org/W2972389417","https://openalex.org/W2972621414","https://openalex.org/W2972799770","https://openalex.org/W2973213659","https://openalex.org/W2981857663","https://openalex.org/W3006752097","https://openalex.org/W3007550212","https://openalex.org/W3080248383","https://openalex.org/W3095311338","https://openalex.org/W3097777922","https://openalex.org/W3097904285","https://openalex.org/W3113244915","https://openalex.org/W3125118953","https://openalex.org/W3161365423","https://openalex.org/W3163793923","https://openalex.org/W3194334755","https://openalex.org/W3198654230","https://openalex.org/W3198769601","https://openalex.org/W4205251020","https://openalex.org/W4221154745","https://openalex.org/W4224917961","https://openalex.org/W4283654390","https://openalex.org/W4296069265","https://openalex.org/W4297841331","https://openalex.org/W4302557958","https://openalex.org/W4322731129","https://openalex.org/W4372260552","https://openalex.org/W4391021584"],"related_works":["https://openalex.org/W2206035908","https://openalex.org/W2149220986","https://openalex.org/W1493012537","https://openalex.org/W4247736853","https://openalex.org/W1992908141","https://openalex.org/W2162158162","https://openalex.org/W1999004162","https://openalex.org/W4245698648","https://openalex.org/W2125642021","https://openalex.org/W1521049138"],"abstract_inverted_index":{"Practical":[0],"application":[1],"of":[2,48,112,136,149],"model-based":[3,159],"speaker":[4,32,67,89,181],"adaptation":[5,33,102,160],"techniques":[6],"to":[7,62,79,114,138,151],"end-to-end":[8],"ASR":[9,43],"systems":[10],"is":[11,60,77],"hindered":[12],"by":[13],"speaker-level":[14,163],"data":[15,50,164],"scarcity":[16],"and":[17,28,86,123,128,143],"latency":[18],"in":[19,37],"speaker-dependent":[20],"(SD)":[21],"parameters":[22,59],"update.":[23],"To":[24],"this":[25,38],"end,":[26],"data-efficient":[27],"low-latency":[29],"rapid":[30,88],"feature-based":[31],"approaches":[34],"are":[35,154],"proposed":[36,101],"paper":[39],"for":[40],"state-of-the-art":[41],"Conformer":[42],"systems.":[44],"Compact":[45],"subspace":[46],"projection":[47],"training":[49],"estimated":[51],"SD":[52,83,174],"hidden":[53],"layer":[54],"output":[55],"scaling":[56],"or":[57],"bias":[58],"used":[61],"represent":[63],"the":[64,94,100,120,171],"most":[65],"distinctive":[66],"\"bases\".":[68],"A":[69],"feature-driven":[70],"prediction":[71],"network":[72],"containing":[73],"purpose-built":[74],"speaker-aware":[75],"memory":[76],"designed":[78],"on-the-fly":[80,172],"produce":[81],"homogeneous":[82],"basis":[84,175],"interpolation,":[85],"facilitate":[87],"adaptation.":[90],"Experimental":[91],"results":[92],"on":[93],"300-hr":[95],"Switchboard":[96],"corpus":[97],"suggest":[98],"that":[99],"approach":[103],"produces":[104],"statistically":[105],"significant":[106],"word":[107],"error":[108],"rate":[109],"(WER)":[110],"reductions":[111,135],"up":[113,137,147,150],"1.0%":[115],"absolute":[116,140],"(8.4%":[117],"relative)":[118,142],"over":[119,157],"baseline":[121],"speaker-independent":[122],"i-vector":[124],"adapted":[125],"Conformers":[126],"before":[127],"after":[129],"external":[130],"LM":[131],"rescoring.":[132],"Consistent":[133],"WER":[134],"2.0%":[139],"(16.3%":[141],"real-time":[144],"factor":[145],"speeding":[146],"ratios":[148],"10.9":[152],"times":[153],"also":[155],"obtained":[156],"offline":[158],"across":[161],"different":[162],"quantity":[165],"operating":[166],"points.":[167],"T-SNE":[168],"visualization":[169],"reveals":[170],"predicted":[173],"weights":[176],"present":[177],"intuitively":[178],"more":[179],"consistent":[180],"features":[182],"than":[183],"i-vectors.":[184]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-04-13T07:58:08.660418","created_date":"2025-10-10T00:00:00"}
