{"id":"https://openalex.org/W4414735573","doi":"https://doi.org/10.1145/3731569.3764798","title":"Mercury: Unlocking Multi-GPU Operator Optimization for LLMs via Remote Memory Scheduling","display_name":"Mercury: Unlocking Multi-GPU Operator Optimization for LLMs via Remote Memory Scheduling","publication_year":2025,"publication_date":"2025-10-01","ids":{"openalex":"https://openalex.org/W4414735573","doi":"https://doi.org/10.1145/3731569.3764798"},"language":"en","primary_location":{"id":"doi:10.1145/3731569.3764798","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764798","pdf_url":null,"source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3731569.3764798","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091217469","display_name":"Yue Guan","orcid":"https://orcid.org/0009-0005-7433-2627"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yue Guan","raw_affiliation_strings":["UCSD, La Jolla, California, USA"],"raw_orcid":"https://orcid.org/0009-0005-7433-2627","affiliations":[{"raw_affiliation_string":"UCSD, La Jolla, California, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119806774","display_name":"Xinwei Qiang","orcid":null},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xinwei Qiang","raw_affiliation_strings":["UCSD, La Jolla, USA"],"raw_orcid":"https://orcid.org/0009-0004-1182-6737","affiliations":[{"raw_affiliation_string":"UCSD, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041947969","display_name":"Zaifeng Pan","orcid":"https://orcid.org/0000-0002-6759-2616"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zaifeng Pan","raw_affiliation_strings":["UCSD, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0002-6759-2616","affiliations":[{"raw_affiliation_string":"UCSD, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031938315","display_name":"Daniels Johnson","orcid":null},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniels Johnson","raw_affiliation_strings":["Meta, Mountain View, USA"],"raw_orcid":"https://orcid.org/0009-0004-1126-7445","affiliations":[{"raw_affiliation_string":"Meta, Mountain View, USA","institution_ids":["https://openalex.org/I2800240351"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102291852","display_name":"Yuanwei Fang","orcid":"https://orcid.org/0000-0001-5600-026X"},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuanwei Fang","raw_affiliation_strings":["Meta, Mountain View, USA"],"raw_orcid":"https://orcid.org/0000-0001-5600-026X","affiliations":[{"raw_affiliation_string":"Meta, Mountain View, USA","institution_ids":["https://openalex.org/I2800240351"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063326523","display_name":"Keren Zhou","orcid":"https://orcid.org/0000-0002-7977-3182"},"institutions":[{"id":"https://openalex.org/I162714631","display_name":"George Mason University","ror":"https://ror.org/02jqj7156","country_code":"US","type":"education","lineage":["https://openalex.org/I162714631"]},{"id":"https://openalex.org/I4210161460","display_name":"OpenAI (United States)","ror":"https://ror.org/05wx9n238","country_code":"US","type":"company","lineage":["https://openalex.org/I4210161460"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Keren Zhou","raw_affiliation_strings":["George Mason University, Washington DC, USA","OpenAI, San Francisco, USA"],"raw_orcid":"https://orcid.org/0000-0002-7977-3182","affiliations":[{"raw_affiliation_string":"George Mason University, Washington DC, USA","institution_ids":["https://openalex.org/I162714631"]},{"raw_affiliation_string":"OpenAI, San Francisco, USA","institution_ids":["https://openalex.org/I4210161460"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5022196610","display_name":"Yuke Wang","orcid":"https://orcid.org/0000-0003-1634-8549"},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuke Wang","raw_affiliation_strings":["Rice University, Houston, USA"],"raw_orcid":"https://orcid.org/0000-0003-1634-8549","affiliations":[{"raw_affiliation_string":"Rice University, Houston, USA","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020721427","display_name":"Wan\u2010Lu Li","orcid":"https://orcid.org/0000-0003-0098-0670"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wanlu Li","raw_affiliation_strings":["UCSD, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0003-0098-0670","affiliations":[{"raw_affiliation_string":"UCSD, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048052285","display_name":"Yufei Ding","orcid":"https://orcid.org/0000-0002-8716-5793"},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]},{"id":"https://openalex.org/I36258959","display_name":"University of California San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yufei Ding","raw_affiliation_strings":["Meta, Mountain View, USA","UCSD, La Jolla, USA"],"raw_orcid":"https://orcid.org/0000-0002-8716-5793","affiliations":[{"raw_affiliation_string":"Meta, Mountain View, USA","institution_ids":["https://openalex.org/I2800240351"]},{"raw_affiliation_string":"UCSD, La Jolla, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5112689277","display_name":"Adnan Aziz","orcid":"https://orcid.org/0009-0003-5855-6861"},"institutions":[{"id":"https://openalex.org/I2800240351","display_name":"Mountain View College","ror":"https://ror.org/04fh8an03","country_code":"US","type":"education","lineage":["https://openalex.org/I1291072267","https://openalex.org/I2800240351"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adnan Aziz","raw_affiliation_strings":["Meta, Mountain VIew, USA"],"raw_orcid":"https://orcid.org/0009-0003-5855-6861","affiliations":[{"raw_affiliation_string":"Meta, Mountain VIew, USA","institution_ids":["https://openalex.org/I2800240351"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5091217469"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30432488,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1046","last_page":"1061"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9933000206947327,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10772","display_name":"Distributed systems and fault tolerance","score":0.9890000224113464,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6718999743461609},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6506999731063843},{"id":"https://openalex.org/keywords/abstraction","display_name":"Abstraction","score":0.42590001225471497},{"id":"https://openalex.org/keywords/operator","display_name":"Operator (biology)","score":0.4020000100135803},{"id":"https://openalex.org/keywords/optimizing-compiler","display_name":"Optimizing compiler","score":0.31940001249313354},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.3176000118255615},{"id":"https://openalex.org/keywords/distributed-memory","display_name":"Distributed memory","score":0.29809999465942383}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8234000205993652},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6718999743461609},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6506999731063843},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.45809999108314514},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.42590001225471497},{"id":"https://openalex.org/C17020691","wikidata":"https://www.wikidata.org/wiki/Q139677","display_name":"Operator (biology)","level":5,"score":0.4020000100135803},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3549000024795532},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.31940001249313354},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.3176000118255615},{"id":"https://openalex.org/C91481028","wikidata":"https://www.wikidata.org/wiki/Q1054686","display_name":"Distributed memory","level":3,"score":0.29809999465942383},{"id":"https://openalex.org/C2984984529","wikidata":"https://www.wikidata.org/wiki/Q7619925","display_name":"Storage management","level":2,"score":0.2964000105857849},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2849000096321106},{"id":"https://openalex.org/C2984822820","wikidata":"https://www.wikidata.org/wiki/Q1123036","display_name":"Processor scheduling","level":3,"score":0.2849000096321106},{"id":"https://openalex.org/C2778029271","wikidata":"https://www.wikidata.org/wiki/Q5421931","display_name":"Extension (predicate logic)","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.2775000035762787},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.27720001339912415},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.2768999934196472},{"id":"https://openalex.org/C2777777548","wikidata":"https://www.wikidata.org/wiki/Q206040","display_name":"Mercury (programming language)","level":2,"score":0.2718999981880188},{"id":"https://openalex.org/C82687282","wikidata":"https://www.wikidata.org/wiki/Q66221","display_name":"Auxiliary memory","level":2,"score":0.26019999384880066}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3731569.3764798","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764798","pdf_url":null,"source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3731569.3764798","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3731569.3764798","pdf_url":null,"source":null,"license":"cc-by-nc-sa","license_id":"https://openalex.org/licenses/cc-by-nc-sa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM SIGOPS 31st Symposium on Operating Systems Principles","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2002257715","https://openalex.org/W3037847693","https://openalex.org/W3086105743","https://openalex.org/W4205983429","https://openalex.org/W4213153339","https://openalex.org/W4241618768","https://openalex.org/W4242045284","https://openalex.org/W4251637954","https://openalex.org/W4280611847","https://openalex.org/W4281859785","https://openalex.org/W4281953044","https://openalex.org/W4386768656","https://openalex.org/W4412230188","https://openalex.org/W4412605554"],"related_works":[],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"propose":[4],"Mercury,":[5],"a":[6,12,65,79],"multi-GPU":[7],"operator":[8],"compiler":[9,54],"based":[10],"on":[11],"loop-based":[13],"intermediate":[14],"representation,":[15],"CommIR.":[16],"At":[17],"the":[18,36,40,53,87],"core":[19],"of":[20,35,89],"Mercury":[21,81],"is":[22,82,112],"an":[23,31],"abstraction":[24],"that":[25,70,105],"treats":[26],"remote":[27],"GPU":[28],"memory":[29,37],"as":[30],"explicitly":[32],"managed":[33],"extension":[34],"hierarchy,":[38],"expanding":[39],"available":[41],"storage":[42],"and":[43,61,72,94,96],"communication":[44],"resources":[45],"beyond":[46,74],"local":[47],"HBM.":[48],"This":[49],"unified":[50],"view":[51],"enables":[52],"to":[55,84],"reason":[56],"holistically":[57],"about":[58],"data":[59],"placement":[60],"inter-device":[62],"communication,":[63],"unlocking":[64],"vastly":[66],"larger":[67],"design":[68],"space":[69],"encompasses":[71],"extends":[73],"existing":[75],"manual":[76,106],"strategies.":[77],"As":[78],"result,":[80],"able":[83],"automatically":[85],"reproduce":[86],"performance":[88],"hand-optimized":[90],"baselines":[91],"like":[92],"RingAttention":[93],"Ulysses,":[95],"in":[97],"some":[98],"configurations,":[99],"even":[100],"discovers":[101],"more":[102],"effective":[103],"strategies":[104],"designs":[107],"have":[108],"overlooked.":[109],"Our":[110],"implementation":[111],"open-sourced":[113],"at":[114],"https://github.com/ChandlerGuan/mercury_artifact.":[115]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
