{"id":"https://openalex.org/W7135182357","doi":"https://doi.org/10.48550/arxiv.2603.11273","title":"Duration Aware Scheduling for ASR Serving Under Workload Drift","display_name":"Duration Aware Scheduling for ASR Serving Under Workload Drift","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135182357","doi":"https://doi.org/10.48550/arxiv.2603.11273"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.11273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.11273","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128946199","display_name":"Darshan Makwana","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Makwana, Darshan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092639643","display_name":"Yash Jogi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jogi, Yash","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065428975","display_name":"Harsh Kotta","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kotta, Harsh","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128925385","display_name":"Aayush Kubba","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kubba, Aayush","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.14010000228881836,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12238","display_name":"Green IT and Sustainability","score":0.10899999737739563,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.0803999975323677,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.8152999877929688},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6632000207901001},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6270999908447266},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.619700014591217},{"id":"https://openalex.org/keywords/duration","display_name":"Duration (music)","score":0.4309000074863434},{"id":"https://openalex.org/keywords/proxy","display_name":"Proxy (statistics)","score":0.3043999969959259}],"concepts":[{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.8152999877929688},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7533000111579895},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6632000207901001},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6270999908447266},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.619700014591217},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.5674999952316284},{"id":"https://openalex.org/C112758219","wikidata":"https://www.wikidata.org/wiki/Q16038819","display_name":"Duration (music)","level":2,"score":0.4309000074863434},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.4185999929904938},{"id":"https://openalex.org/C2780148112","wikidata":"https://www.wikidata.org/wiki/Q1432581","display_name":"Proxy (statistics)","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.26829999685287476},{"id":"https://openalex.org/C142603982","wikidata":"https://www.wikidata.org/wiki/Q5021615","display_name":"Call duration","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C127456818","wikidata":"https://www.wikidata.org/wiki/Q238879","display_name":"Rate-monotonic scheduling","level":4,"score":0.2612000107765198},{"id":"https://openalex.org/C16320812","wikidata":"https://www.wikidata.org/wiki/Q1812200","display_name":"Idle","level":2,"score":0.2538999915122986},{"id":"https://openalex.org/C2777632111","wikidata":"https://www.wikidata.org/wiki/Q1937518","display_name":"Reservation","level":2,"score":0.25119999051094055}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.11273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.11273","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.11273","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.5392835140228271}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Scheduling":[0],"policies":[1],"in":[2,14,32,56],"large-scale":[3],"Automatic":[4],"Speech":[5],"Recognition":[6],"(ASR)":[7],"serving":[8,22],"pipelines":[9],"play":[10],"a":[11],"key":[12],"role":[13],"determining":[15],"end-to-end":[16],"(E2E)":[17],"latency.":[18],"Yet,":[19],"widely":[20],"used":[21],"engines":[23],"rely":[24],"on":[25],"first-come-first-served":[26],"(FCFS)":[27],"scheduling,":[28],"which":[29],"ignores":[30],"variability":[31],"request":[33],"duration":[34,47],"and":[35,62,79,87,92,159],"leads":[36],"to":[37,66,99,108,120,123,139,145],"head-of-line":[38],"blocking":[39],"under":[40,90,152],"workload":[41,153],"drift.":[42],"We":[43,70],"show":[44],"that":[45],"audio":[46],"is":[48],"an":[49],"accurate":[50],"proxy":[51],"for":[52],"job":[53],"processing":[54],"time":[55],"ASR":[57],"models":[58],"such":[59],"as":[60],"Whisper,":[61],"use":[63],"this":[64,130],"insight":[65],"enable":[67],"duration-aware":[68],"scheduling.":[69],"integrate":[71],"two":[72],"classical":[73],"algorithms,":[74],"Shortest":[75],"Job":[76],"First":[77],"(SJF)":[78],"Highest":[80],"Response":[81],"Ratio":[82],"Next":[83],"(HRRN),":[84],"into":[85],"vLLM":[86],"evaluate":[88],"them":[89],"realistic":[91],"drifted":[93],"workloads.":[94],"On":[95],"LibriSpeech":[96],"test-clean,":[97],"compared":[98],"baseline,":[100],"SJF":[101],"reduces":[102,133],"median":[103,134],"E2E":[104,135],"latency":[105,117,136],"by":[106,118,137],"up":[107,119,138],"$73\\%$":[109],"at":[110,146],"high":[111],"load,":[112],"but":[113],"increases":[114],"$90$th-percentile":[115],"tail":[116],"$97\\%$":[121],"due":[122],"starvation":[124],"of":[125],"long":[126],"requests.":[127],"HRRN":[128],"addresses":[129],"trade-off:":[131],"it":[132],"$28\\%$":[140],"while":[141],"bounding":[142],"tail-latency":[143],"degradation":[144],"most":[147],"$24\\%$.":[148],"These":[149],"gains":[150],"persist":[151],"drift,":[154],"with":[155],"no":[156],"throughput":[157],"penalty":[158],"$&lt;0.1$\\,ms":[160],"scheduling":[161],"overhead":[162],"per":[163],"request.":[164]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-14T00:00:00"}
