{"id":"https://openalex.org/W4406859726","doi":"https://doi.org/10.1109/apsipaasc63619.2025.10848962","title":"Adversarial Augmentation and Adaptation for Speech Recognition","display_name":"Adversarial Augmentation and Adaptation for Speech Recognition","publication_year":2024,"publication_date":"2024-12-03","ids":{"openalex":"https://openalex.org/W4406859726","doi":"https://doi.org/10.1109/apsipaasc63619.2025.10848962"},"language":"en","primary_location":{"id":"doi:10.1109/apsipaasc63619.2025.10848962","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc63619.2025.10848962","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061908942","display_name":"Jen\u2010Tzung Chien","orcid":"https://orcid.org/0000-0003-3466-8941"},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Jen-Tzung Chien","raw_affiliation_strings":["National Yang Ming Chiao Tung University,Institute of Electrical and Computer Engineering,Hsinchu,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University,Institute of Electrical and Computer Engineering,Hsinchu,Taiwan","institution_ids":["https://openalex.org/I148366613"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5006164943","display_name":"Weiyu Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I148366613","display_name":"National Yang Ming Chiao Tung University","ror":"https://ror.org/00se2k293","country_code":"TW","type":"education","lineage":["https://openalex.org/I148366613"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wei-Yu Sun","raw_affiliation_strings":["National Yang Ming Chiao Tung University,Institute of Electrical and Computer Engineering,Hsinchu,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Yang Ming Chiao Tung University,Institute of Electrical and Computer Engineering,Hsinchu,Taiwan","institution_ids":["https://openalex.org/I148366613"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5061908942"],"corresponding_institution_ids":["https://openalex.org/I148366613"],"apc_list":null,"apc_paid":null,"fwci":0.375,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62560231,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9621999859809875,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9621999859809875,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9264000058174133,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.7910440564155579},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7308259606361389},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6892933249473572},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.6833635568618774},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40793168544769287},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.10277608036994934}],"concepts":[{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.7910440564155579},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7308259606361389},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6892933249473572},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.6833635568618774},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40793168544769287},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.10277608036994934},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc63619.2025.10848962","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc63619.2025.10848962","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2936774411","https://openalex.org/W2985884876","https://openalex.org/W3198347216","https://openalex.org/W3209059054","https://openalex.org/W4225322339","https://openalex.org/W4293846201","https://openalex.org/W4296068966","https://openalex.org/W4385245566","https://openalex.org/W4385822470","https://openalex.org/W4385823060","https://openalex.org/W4387783582","https://openalex.org/W4388820551","https://openalex.org/W4392909075","https://openalex.org/W4400188193","https://openalex.org/W6730161283","https://openalex.org/W6759129252","https://openalex.org/W6761100157","https://openalex.org/W6771467084","https://openalex.org/W6773944720","https://openalex.org/W6780154528","https://openalex.org/W6780218876"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2502115930","https://openalex.org/W2482350142","https://openalex.org/W4246396837","https://openalex.org/W3126451824","https://openalex.org/W1561927205","https://openalex.org/W3191453585","https://openalex.org/W4297672492"],"abstract_inverted_index":{"It":[0],"is":[1,36,44,103],"crucial":[2],"to":[3,8,15,38,59,66,150],"conduct":[4],"parameter":[5],"efficient":[6],"learning":[7],"adapt":[9],"a":[10,16,85,93,107],"large-scaled":[11],"pre-trained":[12,95],"backbone":[13,96],"model":[14,42,78,97,182],"downstream":[17],"task":[18],"where":[19,98],"the":[20,33,41,61,68,74,99,122,130,135,144,152,167,170],"desirable":[21],"performance":[22],"could":[23],"be":[24,116],"achieved":[25],"for":[26,89],"low-resource":[27],"automatic":[28],"speech":[29,124,154],"recognition":[30],"(ASR).":[31],"However,":[32],"overfitting":[34,75],"problem":[35],"prone":[37],"happen":[39],"when":[40,133],"adaptation":[43,175],"performed":[45],"through":[46],"fine-tuning":[47],"individual":[48],"parameters.":[49],"The":[50,141,156],"previous":[51],"studies":[52],"have":[53],"explored":[54],"different":[55],"data":[56,69,101,125],"augmentation":[57,102,173],"methods":[58],"increase":[60],"size":[62],"of":[63,110,143,169,178],"training":[64,88,134],"samples":[65],"enrich":[67],"coverage":[70],"and":[71,163,174,181],"accordingly":[72],"alleviate":[73],"issue":[76],"in":[77,113,137,176],"training.":[79],"In":[80,119],"particular,":[81],"this":[82,120],"paper":[83],"presents":[84],"new":[86],"adversarial":[87,100,123,131,146,172],"ASR":[90,139,159],"based":[91],"on":[92,158],"frozen":[94],"implemented":[104],"so":[105],"that":[106],"small":[108],"amount":[109],"controllable":[111],"parameters":[112],"adapters":[114,136],"can":[115],"sufficiently":[117],"estimated.":[118],"study,":[121],"are":[126,148],"generated":[127],"by":[128],"adding":[129],"perturbations":[132],"an":[138],"model.":[140],"gradients":[142],"intermediate":[145],"examples":[147],"accumulated":[149],"calculate":[151],"augmented":[153],"samples.":[155],"experiments":[157],"using":[160],"Common":[161],"Voice":[162],"LibriSpeech":[164],"datasets":[165],"show":[166],"merit":[168],"proposed":[171],"terms":[177],"error":[179],"rate":[180],"size.":[183]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
