{"id":"https://openalex.org/W4416798695","doi":"https://doi.org/10.1109/apsipaasc65261.2025.11249368","title":"Rethinking Robust ASR Strategies: Can Textual in-Context Learning Improve Acoustic Robustness?","display_name":"Rethinking Robust ASR Strategies: Can Textual in-Context Learning Improve Acoustic Robustness?","publication_year":2025,"publication_date":"2025-10-22","ids":{"openalex":"https://openalex.org/W4416798695","doi":"https://doi.org/10.1109/apsipaasc65261.2025.11249368"},"language":null,"primary_location":{"id":"doi:10.1109/apsipaasc65261.2025.11249368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc65261.2025.11249368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5061596551","display_name":"Benita Angela Titalim","orcid":null},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Benita Angela Titalim","raw_affiliation_strings":["Nara Institute of Science and Technology,Japan"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology,Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083937952","display_name":"Faisal Mehmood","orcid":"https://orcid.org/0000-0003-4420-0517"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Faisal Mehmood","raw_affiliation_strings":["Nara Institute of Science and Technology,Japan"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology,Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040108974","display_name":"Sakriani Sakti","orcid":"https://orcid.org/0000-0001-5509-8963"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Sakriani Sakti","raw_affiliation_strings":["Nara Institute of Science and Technology,Japan"],"affiliations":[{"raw_affiliation_string":"Nara Institute of Science and Technology,Japan","institution_ids":["https://openalex.org/I75917431"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5061596551"],"corresponding_institution_ids":["https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19444728,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2541","last_page":"2546"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8852999806404114,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.8852999806404114,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.03290000185370445,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.006899999920278788,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.7950999736785889},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.534600019454956},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4207000136375427},{"id":"https://openalex.org/keywords/background-noise","display_name":"Background noise","score":0.4004000127315521},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.3711000084877014},{"id":"https://openalex.org/keywords/retraining","display_name":"Retraining","score":0.28940001130104065}],"concepts":[{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.7950999736785889},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7400000095367432},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6241000294685364},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.534600019454956},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4399000108242035},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4207000136375427},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.4004000127315521},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3711000084877014},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29910001158714294},{"id":"https://openalex.org/C2778712577","wikidata":"https://www.wikidata.org/wiki/Q3505966","display_name":"Retraining","level":2,"score":0.28940001130104065},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2890999913215637},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.28630000352859497},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.26420000195503235}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/apsipaasc65261.2025.11249368","is_oa":false,"landing_page_url":"https://doi.org/10.1109/apsipaasc65261.2025.11249368","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Asia Pacific Signal and Information Processing Association Annual Summit and Conference (APSIPA ASC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3817118553","display_name":null,"funder_award_id":"JP21H05054,JP23K21681,JP25H01139","funder_id":"https://openalex.org/F4320320212","funder_display_name":"Japan Society for the Promotion of Science London"},{"id":"https://openalex.org/G561758839","display_name":null,"funder_award_id":"JPMJSP2140","funder_id":"https://openalex.org/F4320320907","funder_display_name":"Japan Science and Technology Corporation"}],"funders":[{"id":"https://openalex.org/F4320320212","display_name":"Japan Society for the Promotion of Science London","ror":"https://ror.org/02m7axw05"},{"id":"https://openalex.org/F4320320907","display_name":"Japan Science and Technology Corporation","ror":"https://ror.org/00097mb19"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W1544785557","https://openalex.org/W1897240248","https://openalex.org/W2039716246","https://openalex.org/W2151484683","https://openalex.org/W2242685705","https://openalex.org/W2289394825","https://openalex.org/W2403307129","https://openalex.org/W2923728956","https://openalex.org/W3086154751","https://openalex.org/W3163211337","https://openalex.org/W3173617765","https://openalex.org/W3196597447","https://openalex.org/W4225873749","https://openalex.org/W4241001665","https://openalex.org/W4319586210","https://openalex.org/W4385570025","https://openalex.org/W4389520395","https://openalex.org/W4389524200","https://openalex.org/W4390871920","https://openalex.org/W4399265137","https://openalex.org/W4402671583","https://openalex.org/W4404783344","https://openalex.org/W4410529749"],"related_works":[],"abstract_inverted_index":{"Automatic":[0],"speech":[1],"recognition":[2],"(ASR)":[3],"systems":[4],"have":[5],"achieved":[6],"high":[7],"accuracy":[8],"under":[9],"clean":[10],"conditions,":[11,44],"yet":[12],"their":[13],"performance":[14],"deteriorates":[15],"in":[16,70],"real-world":[17],"environments":[18],"with":[19,77,122],"acoustic":[20,29,103],"variability.":[21],"Most":[22],"existing":[23],"robustness":[24,60,140],"methods":[25,104],"focus":[26,57],"on":[27,58],"the":[28,85,92],"side-using":[30],"enhancement":[31],"techniques,":[32],"noise-aware":[33],"training,":[34],"or":[35,144],"specialized":[36],"architectures-but":[37],"often":[38],"struggle":[39],"to":[40,84,107,127,153],"generalize":[41],"across":[42],"diverse":[43],"especially":[45],"when":[46,89],"background":[47,75],"noise":[48,90],"includes":[49],"non-target":[50],"human":[51,80],"speech.":[52],"In":[53],"this":[54,137],"work,":[55],"we":[56],"ASR":[59,129],"against":[61],"a":[62,114,148],"range":[63],"of":[64,94,113],"irrelevant":[65],"inputs":[66],"that":[67,101,136],"commonly":[68],"occur":[69],"daily":[71],"life-namely,":[72],"stationary":[73],"noise,":[74],"music":[76],"vocals,":[78],"and":[79,150],"utterances":[81],"not":[82],"belonging":[83],"target":[86],"speaker.":[87],"Crucially,":[88],"takes":[91],"form":[93],"intelligible":[95],"speech,":[96],"it":[97],"introduces":[98],"linguistic":[99],"interference":[100],"purely":[102],"may":[105],"fail":[106],"suppress.":[108],"This":[109],"motivates":[110],"our":[111],"investigation":[112],"novel,":[115],"language-level":[116],"approach:":[117],"leveraging":[118],"textual":[119],"in-context":[120],"learning":[121],"large":[123],"language":[124],"models":[125],"(LLMs)":[126],"guide":[128],"output":[130],"through":[131],"prompting.":[132],"Experiment":[133],"results":[134],"demonstrate":[135],"strategy":[138],"enhances":[139],"without":[141],"requiring":[142],"retraining":[143],"front-end":[145],"modification,":[146],"providing":[147],"flexible":[149],"scalable":[151],"alternative":[152],"conventional":[154],"acoustic-centric":[155],"methods.":[156]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
