{"id":"https://openalex.org/W4372262420","doi":"https://doi.org/10.1109/icassp49357.2023.10095433","title":"Towards A Unified Conformer Structure: from ASR to ASV Task","display_name":"Towards A Unified Conformer Structure: from ASR to ASV Task","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372262420","doi":"https://doi.org/10.1109/icassp49357.2023.10095433"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095433","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095433","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039844745","display_name":"Dexin Liao","orcid":null},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Dexin Liao","raw_affiliation_strings":["Xiamen University,School of Informatics,China","School of Informatics, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100364347","display_name":"Tao Jiang","orcid":"https://orcid.org/0000-0001-8937-679X"},"institutions":[{"id":"https://openalex.org/I4210166335","display_name":"Xiamen Tobacco Industry (China)","ror":"https://ror.org/05t1nkw30","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210166335"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Jiang","raw_affiliation_strings":["Xiamen Talentedsoft Co., Ltd.,China","Xiamen Talentedsoft Co., Ltd., China"],"affiliations":[{"raw_affiliation_string":"Xiamen Talentedsoft Co., Ltd.,China","institution_ids":["https://openalex.org/I4210166335"]},{"raw_affiliation_string":"Xiamen Talentedsoft Co., Ltd., China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100612008","display_name":"Feng Wang","orcid":"https://orcid.org/0000-0001-6494-3639"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Feng Wang","raw_affiliation_strings":["Xiamen University,School of Informatics,China","School of Informatics, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100412926","display_name":"Lin Li","orcid":"https://orcid.org/0000-0003-0426-6546"},"institutions":[{"id":"https://openalex.org/I75867142","display_name":"Xiamen University of Technology","ror":"https://ror.org/01285e189","country_code":"CN","type":"education","lineage":["https://openalex.org/I75867142"]},{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lin Li","raw_affiliation_strings":["Xiamen University,School of Electronic Science and Engineering,China","School of Electronic Science and Engineering, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Electronic Science and Engineering,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"School of Electronic Science and Engineering, Xiamen University, China","institution_ids":["https://openalex.org/I75867142"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011997254","display_name":"Qingyang Hong","orcid":"https://orcid.org/0000-0001-7380-8690"},"institutions":[{"id":"https://openalex.org/I191208505","display_name":"Xiamen University","ror":"https://ror.org/00mcjh785","country_code":"CN","type":"education","lineage":["https://openalex.org/I191208505"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingyang Hong","raw_affiliation_strings":["Xiamen University,School of Informatics,China","School of Informatics, Xiamen University, China"],"affiliations":[{"raw_affiliation_string":"Xiamen University,School of Informatics,China","institution_ids":["https://openalex.org/I191208505"]},{"raw_affiliation_string":"School of Informatics, Xiamen University, China","institution_ids":["https://openalex.org/I191208505"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5039844745"],"corresponding_institution_ids":["https://openalex.org/I191208505"],"apc_list":null,"apc_paid":null,"fwci":1.7646,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.86114245,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/conformational-isomerism","display_name":"Conformational isomerism","score":0.8377658128738403},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8066821098327637},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5519888401031494},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4901333749294281},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.4705009162425995},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4571291506290436},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.42350366711616516}],"concepts":[{"id":"https://openalex.org/C18705241","wikidata":"https://www.wikidata.org/wiki/Q1128023","display_name":"Conformational isomerism","level":3,"score":0.8377658128738403},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8066821098327637},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5519888401031494},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4901333749294281},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.4705009162425995},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4571291506290436},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.42350366711616516},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C32909587","wikidata":"https://www.wikidata.org/wiki/Q11369","display_name":"Molecule","level":2,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095433","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095433","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6399999856948853,"display_name":"Quality Education"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W2219249508","https://openalex.org/W2696967604","https://openalex.org/W2726515241","https://openalex.org/W2747165665","https://openalex.org/W2794506738","https://openalex.org/W2808631503","https://openalex.org/W2890964092","https://openalex.org/W2896457183","https://openalex.org/W2963959597","https://openalex.org/W2969985801","https://openalex.org/W2972743990","https://openalex.org/W2992043355","https://openalex.org/W3003903817","https://openalex.org/W3015598461","https://openalex.org/W3024869864","https://openalex.org/W3094502228","https://openalex.org/W3096918678","https://openalex.org/W3097777922","https://openalex.org/W3142516134","https://openalex.org/W3163421828","https://openalex.org/W3184661981","https://openalex.org/W3196514189","https://openalex.org/W3198452307","https://openalex.org/W3198694222","https://openalex.org/W3198698812","https://openalex.org/W3203407300","https://openalex.org/W4206908380","https://openalex.org/W4221154745","https://openalex.org/W4221167707","https://openalex.org/W4224916451","https://openalex.org/W4285206226","https://openalex.org/W4287646898","https://openalex.org/W4288091954","https://openalex.org/W4297841855","https://openalex.org/W4385245566","https://openalex.org/W4388979610","https://openalex.org/W6739901393","https://openalex.org/W6783600611","https://openalex.org/W6784333009","https://openalex.org/W6798696458"],"related_works":["https://openalex.org/W3183901164","https://openalex.org/W3135818718","https://openalex.org/W4290188444","https://openalex.org/W3167935049","https://openalex.org/W3003905048","https://openalex.org/W4206178588","https://openalex.org/W2253429366","https://openalex.org/W3127975138","https://openalex.org/W4287635093","https://openalex.org/W3094491777"],"abstract_inverted_index":{"Transformer":[0],"has":[1,23],"achieved":[2],"extraordinary":[3],"performance":[4,107],"in":[5,28,151,177,184],"Natural":[6],"Language":[7],"Processing":[8],"and":[9,19,49,83,97,158,169],"Computer":[10],"Vision":[11],"tasks":[12],"thanks":[13],"to":[14,73,89,124,166,179],"its":[15,20,181],"powerful":[16],"self-attention":[17,135],"mechanism,":[18],"variant":[21],"Conformer":[22,59,69,102,121,165],"become":[24],"a":[25,175],"state-of-the-art":[26],"architecture":[27,39,70],"the":[29,37,58,68,110,116,128,141,162],"field":[30],"of":[31,156,164],"Automatic":[32,41],"Speech":[33],"Recognition":[34],"(ASR).":[35],"However,":[36],"main-stream":[38],"for":[40,55],"Speaker":[42],"Verification":[43],"(ASV)":[44],"is":[45,51,122,189],"convolutional":[46],"Neural":[47],"Networks,":[48],"there":[50],"still":[52],"much":[53],"room":[54],"research":[56],"on":[57,95,140,153],"based":[60,103],"ASV.":[61],"In":[62],"this":[63],"paper,":[64],"firstly,":[65],"we":[66,173],"modify":[67],"from":[71,127],"ASR":[72,130,170],"ASV":[74,104,120,168],"with":[75,109],"very":[76],"minor":[77],"changes.":[78],"Length-Scaled":[79],"Attention":[80],"(LSA)":[81],"method":[82],"Sharpness-Aware":[84],"Minimization":[85],"(SAM)":[86],"are":[87],"adopted":[88],"improve":[90],"model":[91],"generalization.":[92],"Experiments":[93],"conducted":[94],"VoxCeleb":[96,157],"CN-Celeb":[98],"show":[99],"that":[100],"our":[101],"achieves":[105],"competitive":[106],"com-pared":[108],"popular":[111],"ECAPA-TDNN.":[112],"Secondly,":[113],"inspired":[114],"by":[115],"transfer":[117],"learning":[118],"strategy,":[119],"natural":[123],"be":[125],"initialized":[126],"pretrained":[129],"model.":[131],"Via":[132],"parameter":[133],"transferring,":[134],"mechanism":[136],"could":[137],"better":[138],"focus":[139],"relationship":[142],"between":[143],"sequence":[144],"features,":[145],"brings":[146],"about":[147],"11%":[148],"relative":[149],"improvement":[150],"EER":[152],"test":[154],"set":[155],"CN-Celeb,":[159],"which":[160],"reveals":[161],"potential":[163],"unify":[167],"task.":[171],"Finally,":[172],"provide":[174],"runtime":[176],"ASV-Subtools":[178],"evaluate":[180],"inference":[182],"speed":[183],"production":[185],"scenario.":[186],"Our":[187],"code":[188],"released":[190],"at":[191],"https://github.com/Snowdar/asv-subtools/tree/master/doc/papers/conformer.md.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":6}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
