{"id":"https://openalex.org/W7123919272","doi":"https://doi.org/10.1145/3772052.3772254","title":"M <scp>od</scp> S <scp>erve</scp> : Modality- and Stage-Aware Resource Disaggregation for Scalable Multimodal Model Serving","display_name":"M <scp>od</scp> S <scp>erve</scp> : Modality- and Stage-Aware Resource Disaggregation for Scalable Multimodal Model Serving","publication_year":2025,"publication_date":"2025-11-19","ids":{"openalex":"https://openalex.org/W7123919272","doi":"https://doi.org/10.1145/3772052.3772254"},"language":null,"primary_location":{"id":"doi:10.1145/3772052.3772254","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772254","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3772052.3772254","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037793114","display_name":"Haoran Qiu","orcid":"https://orcid.org/0000-0002-8071-1130"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoran Qiu","raw_affiliation_strings":["Microsoft, Redmond, Washington, USA"],"raw_orcid":"https://orcid.org/0000-0002-8071-1130","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122988217","display_name":"Anish Biswas","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Anish Biswas","raw_affiliation_strings":["Microsoft, Bengaluru, India"],"raw_orcid":"https://orcid.org/0000-0001-6149-9739","affiliations":[{"raw_affiliation_string":"Microsoft, Bengaluru, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025209097","display_name":"Z.W. Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zihan Zhao","raw_affiliation_strings":["University of Virginia, Charlottesville, USA"],"raw_orcid":"https://orcid.org/0000-0001-5167-0918","affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122950428","display_name":"Jayashree Mohan","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jayashree Mohan","raw_affiliation_strings":["Microsoft, Bengaluru, India"],"raw_orcid":"https://orcid.org/0009-0005-5260-3203","affiliations":[{"raw_affiliation_string":"Microsoft, Bengaluru, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5116143791","display_name":"Alind Khare","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Alind Khare","raw_affiliation_strings":["Microsoft, Bengaluru, India"],"raw_orcid":"https://orcid.org/0000-0003-4649-9022","affiliations":[{"raw_affiliation_string":"Microsoft, Bengaluru, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Esha Choukse","orcid":"https://orcid.org/0000-0003-0371-5522"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Esha Choukse","raw_affiliation_strings":["Microsoft, Redmond, Washington, USA"],"raw_orcid":"https://orcid.org/0000-0003-0371-5522","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090311560","display_name":"\u00cd\u00f1igo Goiri","orcid":"https://orcid.org/0000-0003-2591-4012"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"\u00cd\u00f1igo Goiri","raw_affiliation_strings":["Microsoft, Redmond, Washington, USA"],"raw_orcid":"https://orcid.org/0000-0003-2591-4012","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040160093","display_name":"Zeyu Zhang","orcid":"https://orcid.org/0009-0005-7853-6854"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zeyu Zhang","raw_affiliation_strings":["University of Virginia, Charlottesville, USA"],"raw_orcid":"https://orcid.org/0009-0005-7853-6854","affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haiying Shen","orcid":"https://orcid.org/0000-0002-7548-6223"},"institutions":[{"id":"https://openalex.org/I51556381","display_name":"University of Virginia","ror":"https://ror.org/0153tk833","country_code":"US","type":"education","lineage":["https://openalex.org/I51556381"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haiying Shen","raw_affiliation_strings":["University of Virginia, Charlottesville, USA"],"raw_orcid":"https://orcid.org/0000-0002-7548-6223","affiliations":[{"raw_affiliation_string":"University of Virginia, Charlottesville, USA","institution_ids":["https://openalex.org/I51556381"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122950530","display_name":"Chetan Bansal","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chetan Bansal","raw_affiliation_strings":["Microsoft, Redmond, USA"],"raw_orcid":"https://orcid.org/0000-0003-0102-8139","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063359961","display_name":"Ram Ramjee","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ram Ramjee","raw_affiliation_strings":["Microsoft, Bengaluru, India"],"raw_orcid":"https://orcid.org/0000-0003-0007-6040","affiliations":[{"raw_affiliation_string":"Microsoft, Bengaluru, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"last","author":{"id":null,"display_name":"Rodrigo Fonseca","orcid":"https://orcid.org/0000-0001-9662-2661"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rodrigo Fonseca","raw_affiliation_strings":["Microsoft, Redmond, Washington, USA"],"raw_orcid":"https://orcid.org/0000-0001-9662-2661","affiliations":[{"raw_affiliation_string":"Microsoft, Redmond, Washington, USA","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8699,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.88838867,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"817","last_page":"830"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.335099995136261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.335099995136261,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.17159999907016754,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.10530000180006027,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6446999907493591},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.531000018119812},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.44190001487731934},{"id":"https://openalex.org/keywords/production","display_name":"Production (economics)","score":0.3865000009536743},{"id":"https://openalex.org/keywords/production-model","display_name":"Production model","score":0.3228999972343445},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.29739999771118164},{"id":"https://openalex.org/keywords/resource-allocation","display_name":"Resource allocation","score":0.2953999936580658},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.2946000099182129}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7372999787330627},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6446999907493591},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.531000018119812},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.44190001487731934},{"id":"https://openalex.org/C2778348673","wikidata":"https://www.wikidata.org/wiki/Q739302","display_name":"Production (economics)","level":2,"score":0.3865000009536743},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3817000091075897},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33180001378059387},{"id":"https://openalex.org/C2992770021","wikidata":"https://www.wikidata.org/wiki/Q7247850","display_name":"Production model","level":3,"score":0.3228999972343445},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3093000054359436},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.2953999936580658},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.28209999203681946},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C2780910867","wikidata":"https://www.wikidata.org/wiki/Q1952416","display_name":"Multimodality","level":2,"score":0.27869999408721924},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.27079999446868896},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.26980000734329224},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.26669999957084656},{"id":"https://openalex.org/C3017813396","wikidata":"https://www.wikidata.org/wiki/Q17078173","display_name":"Resource constraints","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3772052.3772254","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772254","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3772052.3772254","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3772052.3772254","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 ACM Symposium on Cloud Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.45456698536872864,"id":"https://metadata.un.org/sdg/8","display_name":"Decent work and economic growth"}],"awards":[{"id":"https://openalex.org/G7475470625","display_name":null,"funder_award_id":"NSF-2421782, NSF-2350425, NSF-2319988, NSF-2206522","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320307764","display_name":"Microsoft","ror":"https://ror.org/00d0nc645"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W1985006281","https://openalex.org/W3168640669","https://openalex.org/W4312960084","https://openalex.org/W4387321091","https://openalex.org/W4401211704","https://openalex.org/W4404181131","https://openalex.org/W4405595839"],"related_works":[],"abstract_inverted_index":{"Large":[0],"multimodal":[1],"models":[2],"(LMMs)":[3],"demonstrate":[4],"impressive":[5],"capabilities":[6],"in":[7,19],"understanding":[8],"images,":[9],"videos,":[10],"and":[11,31,39],"audio":[12],"beyond":[13],"text.":[14],"However,":[15],"efficiently":[16],"serving":[17],"LMMs":[18],"production":[20],"environments":[21],"poses":[22],"significant":[23],"challenges":[24],"due":[25],"to":[26],"their":[27,35],"complex":[28],"model":[29],"architectures":[30],"heterogeneous":[32],"characteristics":[33],"across":[34],"multi-stage":[36],"inference":[37],"pipelines":[38],"modalities.":[40]},"counts_by_year":[{"year":2026,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-14T00:00:00"}
