{"id":"https://openalex.org/W7134283157","doi":"https://doi.org/10.48550/arxiv.2603.05800","title":"StreamWise: Serving Multi-Modal Generation in Real-Time at Scale","display_name":"StreamWise: Serving Multi-Modal Generation in Real-Time at Scale","publication_year":2026,"publication_date":"2026-03-06","ids":{"openalex":"https://openalex.org/W7134283157","doi":"https://doi.org/10.48550/arxiv.2603.05800"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2603.05800","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037793114","display_name":"Haoran Qiu","orcid":"https://orcid.org/0000-0002-8071-1130"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qiu, Haoran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128420861","display_name":"Gohar Irfan Chaudhry","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chaudhry, Gohar Irfan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045494456","display_name":"Chaojie Zhang","orcid":"https://orcid.org/0009-0002-8334-1291"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Chaojie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090311560","display_name":"\u00cd\u00f1igo Goiri","orcid":"https://orcid.org/0000-0003-2591-4012"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goiri, \u00cd\u00f1igo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045708568","display_name":"Esha Choukse","orcid":"https://orcid.org/0000-0003-0371-5522"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choukse, Esha","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036529548","display_name":"Rodrigo Fonseca","orcid":"https://orcid.org/0000-0001-9662-2661"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fonseca, Rodrigo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5089615986","display_name":"Ricardo Bianchini","orcid":"https://orcid.org/0000-0001-5971-5084"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bianchini, Ricardo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.45559999346733093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.45559999346733093,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.058800000697374344,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.04800000041723251,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6834999918937683},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6133999824523926},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.5999000072479248},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.41670000553131104},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4025999903678894},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.3953000009059906},{"id":"https://openalex.org/keywords/low-latency","display_name":"Low latency (capital markets)","score":0.35659998655319214},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.3285999894142151}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.794700026512146},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6834999918937683},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6133999824523926},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.5999000072479248},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.4471000134944916},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.41670000553131104},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4025999903678894},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3953000009059906},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3718999922275543},{"id":"https://openalex.org/C46637626","wikidata":"https://www.wikidata.org/wiki/Q6693015","display_name":"Low latency (capital markets)","level":2,"score":0.35659998655319214},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.32659998536109924},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.31769999861717224},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.28999999165534973},{"id":"https://openalex.org/C202474056","wikidata":"https://www.wikidata.org/wiki/Q1931635","display_name":"Video tracking","level":3,"score":0.2847999930381775},{"id":"https://openalex.org/C5119721","wikidata":"https://www.wikidata.org/wiki/Q220501","display_name":"Quality of service","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C65483669","wikidata":"https://www.wikidata.org/wiki/Q3536669","display_name":"Video processing","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C2777958785","wikidata":"https://www.wikidata.org/wiki/Q17120940","display_name":"Resource efficiency","level":2,"score":0.26179999113082886},{"id":"https://openalex.org/C3017813396","wikidata":"https://www.wikidata.org/wiki/Q17078173","display_name":"Resource constraints","level":2,"score":0.26170000433921814},{"id":"https://openalex.org/C2986160907","wikidata":"https://www.wikidata.org/wiki/Q220499","display_name":"Video streaming","level":2,"score":0.2596000134944916},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.2590000033378601}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2603.05800","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2603.05800","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.05800","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2603.05800","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Advances":[0],"in":[1,27,158],"multi-modal":[2,40],"generative":[3],"models":[4,53],"are":[5],"enabling":[6],"new":[7],"applications,":[8],"from":[9,24],"storytelling":[10],"to":[11,118,135],"automated":[12],"media":[13],"synthesis.":[14],"Most":[15],"current":[16],"workloads":[17],"generate":[18],"simple":[19],"outputs":[20],"(e.g.,":[21,106],"image":[22],"generation":[23],"a":[25,151,176],"prompt)":[26],"batch":[28],"mode,":[29],"often":[30],"requiring":[31,48],"several":[32],"seconds":[33],"even":[34],"for":[35,166],"basic":[36],"results.":[37],"Serving":[38],"real-time":[39,80,173],"workflows":[41],"at":[42],"scale":[43],"is":[44],"costly":[45],"and":[46,63,69,87,111,121,131,145],"complex,":[47],"efficient":[49],"coordination":[50],"of":[51,79],"diverse":[52],"(each":[54],"with":[55,175],"unique":[56],"resource":[57,70],"needs)":[58],"across":[59],"language,":[60],"audio,":[61],"image,":[62],"video,":[64],"all":[65],"under":[66,180],"strict":[67],"latency":[68],"constraints.":[71],"We":[72,114,138],"tackle":[73],"these":[74],"challenges":[75],"through":[76],"the":[77,125,140,164],"lens":[78],"podcast":[81,153],"video":[82,129,154],"generation,":[83],"integrating":[84],"LLMs,":[85],"text-to-speech,":[86],"video-audio":[88],"generation.":[89],"To":[90],"meet":[91],"tight":[92],"SLOs,":[93],"we":[94],"design":[95],"an":[96],"adaptive,":[97],"modular":[98],"serving":[99],"system,":[100],"StreamWise,":[101],"that":[102],"dynamically":[103],"manages":[104],"quality":[105],"resolution,":[107],"sharpness),":[108],"model/content":[109],"parallelism,":[110],"resource-aware":[112],"scheduling.":[113],"leverage":[115],"heterogeneous":[116],"hardware":[117],"maximize":[119],"responsiveness":[120],"efficiency.":[122],"For":[123],"example,":[124],"system":[126],"can":[127],"lower":[128],"resolution":[130],"allocate":[132],"more":[133],"resources":[134],"early":[136],"scenes.":[137],"quantify":[139],"trade-offs":[141],"between":[142],"latency,":[143],"cost,":[144],"quality.":[146],"The":[147],"cheapest":[148],"setup":[149],"generates":[150],"10-minute":[152],"on":[155],"A100":[156],"GPUs":[157],"1.4":[159],"hours":[160],"(8.4x":[161],"slower":[162],"than":[163,168],"real-time)":[165],"less":[167],"\\$25.":[169],"StreamWise":[170],"enables":[171],"high-quality":[172],"streaming":[174],"sub-second":[177],"startup":[178],"delay":[179],"$45.":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-10T00:00:00"}
