{"id":"https://openalex.org/W7148371444","doi":"https://doi.org/10.1109/asru65441.2025.11434760","title":"SMILE: Speech Meta In-Context Learning for Low-Resource Language Automatic Speech Recognition","display_name":"SMILE: Speech Meta In-Context Learning for Low-Resource Language Automatic Speech Recognition","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148371444","doi":"https://doi.org/10.1109/asru65441.2025.11434760"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434760","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434760","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033466805","display_name":"Ming-Hao Hsu","orcid":"https://orcid.org/0000-0003-0037-3280"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Ming-Hao Hsu","raw_affiliation_strings":["National Taiwan University,Electrical Engineering Department,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University,Electrical Engineering Department,Taipei,Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5132807525","display_name":"Hung-Yi Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-Yi Lee","raw_affiliation_strings":["National Taiwan University,Electrical Engineering Department,Taipei,Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University,Electrical Engineering Department,Taipei,Taiwan","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5033466805"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.87557898,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7088000178337097,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.7088000178337097,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.03889999911189079,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.026499999687075615,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-technology","display_name":"Speech technology","score":0.4004000127315521},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.3652999997138977},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.3580999970436096},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3515999913215637},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.33709999918937683},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.3353999853134155}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6220999956130981},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6129999756813049},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4171999990940094},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.4004000127315521},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3986999988555908},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.3652999997138977},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3580999970436096},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.33709999918937683},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.3353999853134155},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C54953205","wikidata":"https://www.wikidata.org/wiki/Q4142201","display_name":"Speech analytics","level":4,"score":0.2946000099182129},{"id":"https://openalex.org/C137584468","wikidata":"https://www.wikidata.org/wiki/Q35395","display_name":"Phonetics","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C74672266","wikidata":"https://www.wikidata.org/wiki/Q815859","display_name":"Language acquisition","level":2,"score":0.25859999656677246}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434760","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434760","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.43914374709129333,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W2936774411","https://openalex.org/W3015585292","https://openalex.org/W3099771192","https://openalex.org/W3161686170","https://openalex.org/W3173563729","https://openalex.org/W3198429080","https://openalex.org/W3209059054","https://openalex.org/W3213029956","https://openalex.org/W4221153728","https://openalex.org/W4224933812","https://openalex.org/W4372259777","https://openalex.org/W4372260556","https://openalex.org/W4385570973","https://openalex.org/W4385822439","https://openalex.org/W4385822953","https://openalex.org/W4385823064","https://openalex.org/W4392931630","https://openalex.org/W4400369850","https://openalex.org/W4401044235","https://openalex.org/W4402111519","https://openalex.org/W4402112032","https://openalex.org/W4402112143","https://openalex.org/W4407757482","https://openalex.org/W4411120533","https://openalex.org/W7126440389","https://openalex.org/W7131889158"],"related_works":[],"abstract_inverted_index":{"Automatic":[0],"Speech":[1,61],"Recognition":[2],"(ASR)":[3],"models":[4],"demonstrate":[5],"outstanding":[6],"performance":[7],"on":[8,43,94,100],"high-resource":[9,81],"languages":[10,19,82,90],"but":[11],"face":[12],"significant":[13],"challenges":[14],"when":[15],"applied":[16],"to":[17,21,83,88],"low-resource":[18,89],"due":[20],"limited":[22],"training":[23],"data":[24,36],"and":[25,38,114],"insufficient":[26],"cross-lingual":[27],"generalization.":[28],"Existing":[29],"adaptation":[30,53],"strategies,":[31],"such":[32],"as":[33],"shallow":[34],"fusion,":[35],"augmentation,":[37],"direct":[39],"fine-tuning,":[40],"either":[41],"rely":[42],"external":[44],"resources,":[45],"suffer":[46],"computational":[47],"inefficiencies,":[48],"or":[49],"fail":[50],"in":[51,118],"test-time":[52],"scenarios.":[54],"To":[55],"address":[56],"these":[57],"limitations,":[58],"we":[59],"introduce":[60],"Meta":[62],"In-Context":[63],"LEarning":[64],"(SMILE),":[65],"an":[66],"innovative":[67],"framework":[68],"that":[69,105],"combines":[70],"meta-learning":[71],"with":[72],"speech":[73],"in-context":[74],"learning":[75],"(SICL).":[76],"SMILE":[77,106],"leverages":[78],"metatraining":[79],"from":[80],"enable":[84],"robust,":[85],"few-shot":[86,120],"generalization":[87],"without":[91],"explicit":[92],"fine-tuning":[93],"the":[95,101],"target":[96],"domain.":[97],"Extensive":[98],"experiments":[99],"ML-SUPERB":[102],"benchmark":[103],"show":[104],"consistently":[107],"outperforms":[108],"baseline":[109],"methods,":[110],"significantly":[111],"reducing":[112],"character":[113],"word":[115],"error":[116],"rates":[117],"training-free":[119],"multilingual":[121],"ASR":[122],"tasks.":[123]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
