{"id":"https://openalex.org/W3161716720","doi":"https://doi.org/10.1109/icassp39728.2021.9415017","title":"A Sequential Contrastive Learning Framework for Robust Dysarthric Speech Recognition","display_name":"A Sequential Contrastive Learning Framework for Robust Dysarthric Speech Recognition","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3161716720","doi":"https://doi.org/10.1109/icassp39728.2021.9415017","mag":"3161716720"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9415017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9415017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5015090325","display_name":"Lidan Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lidan Wu","raw_affiliation_strings":["East China Normal University,School of Computer Science and Technology,China","School of Computer Science and Technology, East China Normal University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,School of Computer Science and Technology,China","institution_ids":["https://openalex.org/I66867065"]},{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004411500","display_name":"Daoming Zong","orcid":"https://orcid.org/0009-0004-8109-2943"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Daoming Zong","raw_affiliation_strings":["East China Normal University,School of Computer Science and Technology,China","School of Computer Science and Technology, East China Normal University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,School of Computer Science and Technology,China","institution_ids":["https://openalex.org/I66867065"]},{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047846625","display_name":"Shiliang Sun","orcid":"https://orcid.org/0000-0001-7069-3752"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiliang Sun","raw_affiliation_strings":["East China Normal University,School of Computer Science and Technology,China","School of Computer Science and Technology, East China Normal University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,School of Computer Science and Technology,China","institution_ids":["https://openalex.org/I66867065"]},{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065846668","display_name":"Jing Zhao","orcid":"https://orcid.org/0000-0003-0158-5330"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jing Zhao","raw_affiliation_strings":["East China Normal University,School of Computer Science and Technology,China","School of Computer Science and Technology, East China Normal University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"East China Normal University,School of Computer Science and Technology,China","institution_ids":["https://openalex.org/I66867065"]},{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, China","institution_ids":["https://openalex.org/I66867065"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5015090325"],"corresponding_institution_ids":["https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":1.6792,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.86799404,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"7303","last_page":"7307"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dysarthria","display_name":"Dysarthria","score":0.9199988842010498},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7362508177757263},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6874603033065796},{"id":"https://openalex.org/keywords/projection","display_name":"Projection (relational algebra)","score":0.41226184368133545},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.41186216473579407},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.411540150642395},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3238997161388397},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1924695372581482}],"concepts":[{"id":"https://openalex.org/C2777639682","wikidata":"https://www.wikidata.org/wiki/Q225957","display_name":"Dysarthria","level":2,"score":0.9199988842010498},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7362508177757263},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6874603033065796},{"id":"https://openalex.org/C57493831","wikidata":"https://www.wikidata.org/wiki/Q3134666","display_name":"Projection (relational algebra)","level":2,"score":0.41226184368133545},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41186216473579407},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.411540150642395},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3238997161388397},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1924695372581482},{"id":"https://openalex.org/C118552586","wikidata":"https://www.wikidata.org/wiki/Q7867","display_name":"Psychiatry","level":1,"score":0.0},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9415017","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9415017","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2018363392","https://openalex.org/W2115692477","https://openalex.org/W2327501763","https://openalex.org/W2627092829","https://openalex.org/W2749128330","https://openalex.org/W2792759062","https://openalex.org/W2888807255","https://openalex.org/W2889162002","https://openalex.org/W2889469831","https://openalex.org/W2892009249","https://openalex.org/W2904666201","https://openalex.org/W2936123380","https://openalex.org/W2936774411","https://openalex.org/W2936861580","https://openalex.org/W2948012107","https://openalex.org/W2963403868","https://openalex.org/W2963827914","https://openalex.org/W2971155163","https://openalex.org/W2972389417","https://openalex.org/W2972838300","https://openalex.org/W3005680577","https://openalex.org/W3013783381","https://openalex.org/W3014690389","https://openalex.org/W3015210890","https://openalex.org/W4385245566","https://openalex.org/W6677093080","https://openalex.org/W6739901393","https://openalex.org/W6741807409","https://openalex.org/W6754299077","https://openalex.org/W6763416564","https://openalex.org/W6774314701","https://openalex.org/W6775713988","https://openalex.org/W6775988622"],"related_works":["https://openalex.org/W2331173358","https://openalex.org/W2969484279","https://openalex.org/W3160456149","https://openalex.org/W2168872498","https://openalex.org/W4400873482","https://openalex.org/W4239608382","https://openalex.org/W2329762060","https://openalex.org/W2517018229","https://openalex.org/W2351669973","https://openalex.org/W2312964388"],"abstract_inverted_index":{"Dysarthria":[0],"is":[1,119],"a":[2,55,101],"manifestation":[3],"of":[4,23,82,89,103],"disruption":[5],"in":[6,11,32],"the":[7,20,41,67,83,87,117,125],"neuromuscular":[8],"physiology":[9],"resulting":[10],"uneven,":[12],"slow,":[13],"slurred,":[14],"harsh,":[15],"or":[16,122],"quiet":[17],"speech.":[18],"Despite":[19],"remarkable":[21],"progress":[22],"automatic":[24],"speech":[25,62,69,72],"recognition":[26,63],"(ASR),":[27],"it":[28],"poses":[29],"great":[30],"challenges":[31],"developing":[33],"stable":[34],"ASR":[35],"for":[36,59,107],"dysarthric":[37,61,68],"individuals":[38],"due":[39],"to":[40,78,124],"high":[42],"intra-":[43],"and":[44,47],"inter-speaker":[45],"variations":[46],"data":[48,73],"deficiency.":[49],"In":[50],"this":[51],"paper,":[52],"we":[53],"propose":[54],"contrastive":[56,109],"learning":[57],"framework":[58],"robust":[60],"(DSR)":[64],"by":[65],"capturing":[66],"variability.":[70],"Several":[71],"augmentation":[74],"strategies":[75],"are":[76],"explored":[77],"form":[79],"two":[80],"branches":[81],"framework,":[84],"meanwhile":[85],"alleviating":[86],"scarcity":[88],"dysarthria":[90],"data.":[91],"We":[92],"also":[93],"develop":[94],"an":[95],"efficient":[96],"projection":[97],"head":[98],"acting":[99],"on":[100,113],"sequence":[102],"learned":[104],"hidden":[105],"representations":[106],"defining":[108],"loss.":[110],"Experiment":[111],"results":[112],"DSR":[114],"demonstrate":[115],"that":[116],"model":[118],"better":[120],"than":[121],"comparable":[123],"supervised":[126],"baseline.":[127]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":2}],"updated_date":"2026-05-14T08:36:36.166977","created_date":"2025-10-10T00:00:00"}
