{"id":"https://openalex.org/W4414198773","doi":"https://doi.org/10.1109/dac63849.2025.11132773","title":"A Cross-model Fusion-aware Framework for Optimizing (gather-matmul-scatter)<sub>s</sub> Workload","display_name":"A Cross-model Fusion-aware Framework for Optimizing (gather-matmul-scatter)<sub>s</sub> Workload","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414198773","doi":"https://doi.org/10.1109/dac63849.2025.11132773"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132773","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132773","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102585583","display_name":"Yaoxiu Lian","orcid":"https://orcid.org/0009-0007-7858-5132"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yaoxiu Lian","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085854410","display_name":"Zhenglong Gou","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhihong Gou","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102518831","display_name":"Yibo Han","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yibo Han","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103092072","display_name":"Zhongming Yu","orcid":"https://orcid.org/0000-0003-2064-8106"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhongming Yu","raw_affiliation_strings":["Independent Researcher"],"affiliations":[{"raw_affiliation_string":"Independent Researcher","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036783150","display_name":"Jiaming Xu","orcid":"https://orcid.org/0000-0002-4373-5723"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaming Xu","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040438533","display_name":"Sheng Yuan","orcid":"https://orcid.org/0009-0003-0376-4719"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Sheng Yuan","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory Independent Researcher"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory Independent Researcher","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113114893","display_name":"Zhilin Pei","orcid":null},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Zhilin Pei","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory Independent Researcher"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory Independent Researcher","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010817202","display_name":"Xingcheng Zhang","orcid":"https://orcid.org/0000-0002-4136-9782"},"institutions":[{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Xingcheng Zhang","raw_affiliation_strings":["Shanghai Artificial Intelligence Laboratory Independent Researcher"],"affiliations":[{"raw_affiliation_string":"Shanghai Artificial Intelligence Laboratory Independent Researcher","institution_ids":["https://openalex.org/I4210164862"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100833305","display_name":"Ningyi Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ningyi Xu","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5072245580","display_name":"Guohao Dai","orcid":"https://orcid.org/0000-0001-7346-2685"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guohao Dai","raw_affiliation_strings":["Shanghai Jiao Tong University"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":10,"corresponding_author_ids":["https://openalex.org/A5102585583"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35188195,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11361","display_name":"Digital Radiography and Breast Imaging","score":0.8065999746322632,"subfield":{"id":"https://openalex.org/subfields/2740","display_name":"Pulmonary and Respiratory Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11361","display_name":"Digital Radiography and Breast Imaging","score":0.8065999746322632,"subfield":{"id":"https://openalex.org/subfields/2740","display_name":"Pulmonary and Respiratory Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11325","display_name":"Inertial Sensor and Navigation","score":0.7753000259399414,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.7457000017166138,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.9775999784469604},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.7433000206947327},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6444000005722046},{"id":"https://openalex.org/keywords/dataflow-architecture","display_name":"Dataflow architecture","score":0.5304999947547913},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.4943999946117401},{"id":"https://openalex.org/keywords/data-flow-analysis","display_name":"Data-flow analysis","score":0.47110000252723694},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.40610000491142273},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.3718000054359436},{"id":"https://openalex.org/keywords/workstation","display_name":"Workstation","score":0.36329999566078186}],"concepts":[{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.9775999784469604},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8240000009536743},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.7433000206947327},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6444000005722046},{"id":"https://openalex.org/C176727019","wikidata":"https://www.wikidata.org/wiki/Q1172415","display_name":"Dataflow architecture","level":3,"score":0.5304999947547913},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.5216000080108643},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4950999915599823},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4943999946117401},{"id":"https://openalex.org/C88468194","wikidata":"https://www.wikidata.org/wiki/Q1172416","display_name":"Data-flow analysis","level":3,"score":0.47110000252723694},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.3718000054359436},{"id":"https://openalex.org/C67953723","wikidata":"https://www.wikidata.org/wiki/Q192525","display_name":"Workstation","level":2,"score":0.36329999566078186},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3617999851703644},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.34119999408721924},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3237000107765198},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.31380000710487366},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.31119999289512634},{"id":"https://openalex.org/C114809511","wikidata":"https://www.wikidata.org/wiki/Q1412924","display_name":"Flow network","level":2,"score":0.30410000681877136},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C48903430","wikidata":"https://www.wikidata.org/wiki/Q491370","display_name":"Graph partition","level":3,"score":0.28519999980926514},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2833999991416931},{"id":"https://openalex.org/C489000","wikidata":"https://www.wikidata.org/wiki/Q747385","display_name":"Data flow diagram","level":2,"score":0.2786000072956085},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.27300000190734863},{"id":"https://openalex.org/C115874739","wikidata":"https://www.wikidata.org/wiki/Q825377","display_name":"Critical path method","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.2685000002384186},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.26249998807907104}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132773","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132773","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320327803","display_name":"Shanghai Rising-Star Program","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1920022804","https://openalex.org/W2150066425","https://openalex.org/W2460657278","https://openalex.org/W2521492858","https://openalex.org/W2604314403","https://openalex.org/W2906943923","https://openalex.org/W2907492528","https://openalex.org/W2946609015","https://openalex.org/W2963125977","https://openalex.org/W2979750740","https://openalex.org/W3004507689","https://openalex.org/W3034493208","https://openalex.org/W3097300053","https://openalex.org/W3152893301","https://openalex.org/W3160021293","https://openalex.org/W3196231425","https://openalex.org/W3203965336","https://openalex.org/W4312324806","https://openalex.org/W4399282122"],"related_works":[],"abstract_inverted_index":{"Modern":[0],"deep":[1],"learning":[2],"models,":[3,104],"such":[4],"as":[5,28,33],"Relation":[6],"Graph":[7],"Convolutional":[8,12],"Network":[9],"(RGCN),":[10],"Sparse":[11],"Networks":[13,19],"(SpConv),":[14],"and":[15,56,115,143,176,204],"Mixture":[16],"of":[17,108,173,193],"Experts":[18],"(MoE),":[20],"are":[21],"significantly":[22],"dependent":[23],"on":[24,170],"the":[25,106,132,140,171,174,177],"(gather-matmul-scatter)":[26],"(abbreviated":[27],"(g-mm-s)":[29],"${":[30],"}_{\\mathrm{s}}$)":[31],"workload":[32,76],"their":[34],"fundamental":[35],"computational":[36,101],"pattern.":[37],"While":[38],"existing":[39],"works":[40],"have":[41],"made":[42],"optimization":[43,52,90],"attempts,":[44],"several":[45],"critical":[46],"challenges":[47],"remain":[48],"unsolved,":[49],"including":[50],"domain-specific":[51],"migration,":[53],"time-consuming":[54],"exploration,":[55],"inefficient":[57],"dataflow":[58,96,111,154,167],"with":[59,113],"dynamic":[60],"inputs.To":[61],"address":[62],"these":[63],"challenges,":[64],"we":[65],"introduce":[66],"Efficient-GMS,":[67],"a":[68,94,127,159],"comprehensive":[69,95],"framework":[70,82,87],"that":[71,98,163,182],"enhances":[72],"($\\mathrm{g}-\\mathrm{mm}-\\mathrm{s})_{\\text":[73],"{s":[74],"}}$":[75],"across":[77,103,136],"diverse":[78],"input":[79,175],"scenarios.":[80],"Our":[81],"introduces":[83],"(1)":[84],"A":[85],"Fusion-aware":[86],"enabling":[88,105],"cross-model":[89],"migration.":[91],"We":[92,125,157],"propose":[93],"analysis":[97],"identifies":[99],"shared":[100],"patterns":[102,112,168],"development":[107],"four":[109],"optimized":[110],"vertical":[114],"horizontal":[116],"fusion":[117],"strategies.":[118],"(2)":[119],"Performance":[120],"model-guided":[121],"configuration":[122,150],"space":[123,142],"reduction.":[124],"develop":[126],"performance":[128,186],"model":[129,162,209],"to":[130,211],"predict":[131],"relative":[133],"execution":[134],"efficiency":[135],"configurations,":[137],"thereby":[138],"reducing":[139],"search":[141,145],"minimizing":[144],"time":[146],"while":[147],"ensuring":[148],"optimal":[149,166],"selection.":[151],"(3)":[152],"Adaptive":[153],"selection":[155],"mechanism.":[156],"implement":[158],"lightweight":[160],"heuristic":[161],"dynamically":[164],"selects":[165],"based":[169],"characteristics":[172],"hardware.":[178],"Experimental":[179],"results":[180],"demonstrate":[181],"Efficient-GMS":[183],"achieves":[184],"significant":[185],"gains,":[187],"delivering":[188],"an":[189],"average":[190],"end-to-end":[191],"speedup":[192],"$1.46":[194],"\\times$":[195,200,206],"in":[196,201,207],"RGCN":[197],"model,":[198,203],"$1.32":[199],"Sp-Conv-based":[202],"$1.15":[205],"MoE":[208],"compared":[210],"state-of-the-art":[212],"methods.":[213]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
