{"id":"https://openalex.org/W7128386241","doi":"https://doi.org/10.1007/s11227-026-08256-4","title":"Scalable RL-based data generation and multi-resolution architecture for code-switched speech recognition: a high-performance computing approach","display_name":"Scalable RL-based data generation and multi-resolution architecture for code-switched speech recognition: a high-performance computing approach","publication_year":2026,"publication_date":"2026-02-08","ids":{"openalex":"https://openalex.org/W7128386241","doi":"https://doi.org/10.1007/s11227-026-08256-4"},"language":"en","primary_location":{"id":"doi:10.1007/s11227-026-08256-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11227-026-08256-4","pdf_url":null,"source":{"id":"https://openalex.org/S32326811","display_name":"The Journal of Supercomputing","issn_l":"0920-8542","issn":["0920-8542","1573-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Journal of Supercomputing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/s11227-026-08256-4","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053844652","display_name":"Hemant Palivela","orcid":"https://orcid.org/0000-0002-5040-6979"},"institutions":[{"id":"https://openalex.org/I212738717","display_name":"Dwarkadas J. Sanghvi College of Engineering","ror":"https://ror.org/04d4hxn32","country_code":"IN","type":"education","lineage":["https://openalex.org/I212738717"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Hemant Palivela","raw_affiliation_strings":["Department of Computer Engineering, SVKM\u2019s Dwarkadas J. Sanghvi College of Engineering, University of Mumbai, Mumbai, 400056, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, SVKM\u2019s Dwarkadas J. Sanghvi College of Engineering, University of Mumbai, Mumbai, 400056, India","institution_ids":["https://openalex.org/I212738717"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5078808994","display_name":"Meera Narvekar","orcid":"https://orcid.org/0000-0003-4602-4094"},"institutions":[{"id":"https://openalex.org/I212738717","display_name":"Dwarkadas J. Sanghvi College of Engineering","ror":"https://ror.org/04d4hxn32","country_code":"IN","type":"education","lineage":["https://openalex.org/I212738717"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Meera Narvekar","raw_affiliation_strings":["Department of Computer Engineering, SVKM\u2019s Dwarkadas J. Sanghvi College of Engineering, University of Mumbai, Mumbai, 400056, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, SVKM\u2019s Dwarkadas J. Sanghvi College of Engineering, University of Mumbai, Mumbai, 400056, India","institution_ids":["https://openalex.org/I212738717"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5053844652"],"corresponding_institution_ids":["https://openalex.org/I212738717"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.2431869,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"82","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9174000024795532,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9174000024795532,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.03680000081658363,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10403","display_name":"Phonetics and Phonology Research","score":0.0215000007301569,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6736999750137329},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6173999905586243},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5271000266075134},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.453000009059906},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4447000026702881},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.41609999537467957},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.34439998865127563},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.325300008058548}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9065999984741211},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6736999750137329},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6173999905586243},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5271000266075134},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.453000009059906},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4447000026702881},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.41609999537467957},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.414900004863739},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3928999900817871},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37950000166893005},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.34439998865127563},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.325300008058548},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.32409998774528503},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.31610000133514404},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.31060001254081726},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.3043000102043152},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.2782999873161316},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.27720001339912415},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C68859911","wikidata":"https://www.wikidata.org/wiki/Q1503724","display_name":"Pattern matching","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.26809999346733093},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.26750001311302185},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.26339998841285706},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.2572000026702881},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11227-026-08256-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11227-026-08256-4","pdf_url":null,"source":{"id":"https://openalex.org/S32326811","display_name":"The Journal of Supercomputing","issn_l":"0920-8542","issn":["0920-8542","1573-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Journal of Supercomputing","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11227-026-08256-4","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11227-026-08256-4","pdf_url":null,"source":{"id":"https://openalex.org/S32326811","display_name":"The Journal of Supercomputing","issn_l":"0920-8542","issn":["0920-8542","1573-0484"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The Journal of Supercomputing","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.4769091010093689}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W2127141656","https://openalex.org/W3097777922","https://openalex.org/W3176798216","https://openalex.org/W3209059054","https://openalex.org/W3209984917","https://openalex.org/W3213029956","https://openalex.org/W4319862255","https://openalex.org/W4402112192"],"related_works":[],"abstract_inverted_index":{"Code-switched":[0],"speech":[1,192,316],"recognition":[2],"presents":[3,60],"unique":[4],"computational":[5,176],"challenges":[6,46],"in":[7,49],"processing":[8,130],"multilingual":[9,315],"utterances":[10,103],"where":[11],"speakers":[12],"alternate":[13],"between":[14],"languages,":[15],"requiring":[16,212],"simultaneous":[17],"modeling":[18],"of":[19,36,40,100,115,120,228,267],"multiple":[20],"phonological":[21],"systems":[22],"with":[23,104,140,169],"distinct":[24],"temporal":[25,138],"characteristics.":[26],"Contemporary":[27],"foundation":[28,274],"models":[29,245],"like":[30],"Whisper":[31],"and":[32,38,52,70,96,136,163,205,255,276,304],"XLS-R":[33,209],"need":[34],"billions":[35],"parameters":[37],"thousands":[39],"GPU-hours":[41,269],"for":[42,47,83,280,312],"training.":[43],"This":[44,58],"creates":[45],"deployment":[48],"resource-constrained":[50],"environments":[51],"adaptation":[53],"to":[54,152,243],"specialized":[55,234],"linguistic":[56],"domains.":[57],"paper":[59],"MARS-ASR":[61],"(Multi-Resolution":[62],"Adaptive":[63],"Recognition":[64],"System),":[65],"addressing":[66],"both":[67],"data":[68,87,109,158,224,253],"scarcity":[69],"architectural":[71,256],"efficiency":[72,92,266],"through":[73,159],"three":[74],"synergistic":[75],"innovations:":[76],"(1)":[77],"a":[78,125,165],"scalable":[79],"reinforcement":[80],"learning":[81],"pipeline":[82],"high-quality":[84],"synthetic":[85,223],"code-switched":[86,191],"generation,":[88],"demonstrating":[89,250],"91.3%":[90],"parallel":[91,126],"at":[93,133],"64":[94],"GPUs":[95],"producing":[97],"300":[98],"h":[99],"linguistically":[101],"validated":[102],"4.1/5.0":[105],"human":[106],"quality":[107],"ratings\u2013this":[108],"augmentation":[110],"constitutes":[111],"the":[112,155,226,233],"primary":[113],"driver":[114],"performance":[116,289],"improvement,":[117],"contributing":[118],"64.4%":[119],"total":[121],"WER":[122],"reduction;":[123],"(2)":[124],"triple-branch":[127],"Conformer":[128],"encoder":[129],"acoustic":[131,161,180],"signals":[132],"25ms,":[134],"50ms,":[135],"100ms":[137],"resolutions":[139],"GPU-optimized":[141],"execution":[142],"achieving":[143],"94.2%":[144],"hardware":[145],"utilization":[146],"on":[147,179,189,247],"NVIDIA":[148],"A100":[149],"accelerators,":[150],"designed":[151],"maximally":[153],"exploit":[154],"generated":[156],"training":[157,265],"multi-scale":[160],"modeling;":[162],"(3)":[164],"context-adaptive":[166],"fusion":[167],"mechanism":[168],"learned":[170],"gating":[171],"networks":[172],"that":[173,220,251],"dynamically":[174],"allocate":[175],"resources":[177],"based":[178],"complexity,":[181],"reducing":[182],"inference":[183,281],"cost":[184],"by":[185,240],"23.4%.":[186],"Experimental":[187],"evaluation":[188],"Hindi\u2013Marathi":[190],"demonstrates":[193,287],"16.1%":[194],"Word":[195],"Error":[196],"Rate,":[197],"representing":[198],"38.5%":[199],"relative":[200],"improvement":[201,207],"over":[202,208],"single-resolution":[203,244],"baselines":[204],"9.6%":[206],"2B":[210],"while":[211,221],"85%":[213],"fewer":[214],"parameters.":[215],"Critically,":[216],"ablation":[217],"studies":[218],"reveal":[219],"RL-generated":[222],"provide":[225],"majority":[227],"accuracy":[229,303],"gains":[230],"(64.4%":[231],"contribution),":[232],"multi-resolution":[235],"architecture":[236],"amplifies":[237],"this":[238],"benefit":[239],"62%":[241],"compared":[242],"trained":[246],"identical":[248],"data,":[249],"targeted":[252],"generation":[254],"design":[257],"must":[258],"be":[259],"co-optimized.":[260],"The":[261,297],"complete":[262],"system":[263,298],"achieves":[264,299],"320":[268],"(2.25":[270],"$$\\times$$":[271],"reduction":[272],"versus":[273],"models)":[275],"real-time":[277],"factor":[278],"0.23":[279],"(4,348":[282],"utterances/GPU-hour":[283],"throughput).":[284],"Cross-platform":[285],"benchmarking":[286],"consistent":[288],"scaling":[290],"across":[291],"GPU":[292],"generations":[293],"(V100,":[294],"A100,":[295],"H100).":[296],"94.3%":[300],"language":[301],"identification":[302],"90.1%":[305],"switch-point":[306],"detection":[307],"F1-score,":[308],"establishing":[309],"new":[310],"benchmarks":[311],"computationally":[313],"efficient":[314],"processing.":[317]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-02-10T00:00:00"}
