{"id":"https://openalex.org/W4413014956","doi":"https://doi.org/10.1145/3759441.3759447","title":"Toward Weight Sharing Paradigm for Efficient AI: Training and Inference Serving","display_name":"Toward Weight Sharing Paradigm for Efficient AI: Training and Inference Serving","publication_year":2025,"publication_date":"2025-08-04","ids":{"openalex":"https://openalex.org/W4413014956","doi":"https://doi.org/10.1145/3759441.3759447"},"language":"en","primary_location":{"id":"doi:10.1145/3759441.3759447","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3759441.3759447","pdf_url":null,"source":{"id":"https://openalex.org/S50071195","display_name":"ACM SIGOPS Operating Systems Review","issn_l":"0163-5980","issn":["0163-5980","1943-586X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGOPS Operating Systems Review","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032487759","display_name":"Payman Behnam","orcid":"https://orcid.org/0000-0002-3826-9123"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Payman Behnam","raw_affiliation_strings":["Georgia Tech, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Tech, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108953546","display_name":"Alind Khare","orcid":null},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alind Khare","raw_affiliation_strings":["Georgia Tech, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Tech, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102962728","display_name":"Dhruv Garg","orcid":"https://orcid.org/0009-0002-7655-845X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhruv Garg","raw_affiliation_strings":["Georgia Tech, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Tech, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048451114","display_name":"Alexey Tumanov","orcid":"https://orcid.org/0009-0005-7862-1477"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexey Tumanov","raw_affiliation_strings":["Georgia Tech, GA, USA"],"affiliations":[{"raw_affiliation_string":"Georgia Tech, GA, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5032487759"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22862949,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"59","issue":"1","first_page":"34","last_page":"45"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8135365843772888},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6259118318557739},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6109955906867981},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5186764001846313},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5128050446510315},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.49045297503471375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4832799732685089},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.46414461731910706},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.4390740692615509},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3969750702381134},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.37776461243629456}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8135365843772888},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6259118318557739},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6109955906867981},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5186764001846313},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5128050446510315},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.49045297503471375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4832799732685089},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.46414461731910706},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.4390740692615509},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3969750702381134},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.37776461243629456},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3759441.3759447","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3759441.3759447","pdf_url":null,"source":{"id":"https://openalex.org/S50071195","display_name":"ACM SIGOPS Operating Systems Review","issn_l":"0163-5980","issn":["0163-5980","1943-586X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM SIGOPS Operating Systems Review","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7599999904632568,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W95608104","https://openalex.org/W2586654419","https://openalex.org/W2734941459","https://openalex.org/W2772948367","https://openalex.org/W2791175987","https://openalex.org/W2794670651","https://openalex.org/W2885311373","https://openalex.org/W2897268228","https://openalex.org/W2908896109","https://openalex.org/W2931743911","https://openalex.org/W2964081807","https://openalex.org/W2975257729","https://openalex.org/W2981698279","https://openalex.org/W2997958863","https://openalex.org/W2998218113","https://openalex.org/W3035332806","https://openalex.org/W3048046405","https://openalex.org/W3096533519","https://openalex.org/W3101781196","https://openalex.org/W3109946440","https://openalex.org/W3136429794","https://openalex.org/W3180150746","https://openalex.org/W3204296682","https://openalex.org/W3206225415","https://openalex.org/W4200386182","https://openalex.org/W4214490831","https://openalex.org/W4318619660","https://openalex.org/W4383046424","https://openalex.org/W4386902919","https://openalex.org/W6600168703","https://openalex.org/W6600219630","https://openalex.org/W6600281463","https://openalex.org/W6600669965","https://openalex.org/W6602657726","https://openalex.org/W6606876336","https://openalex.org/W6753278433","https://openalex.org/W6754220138","https://openalex.org/W6812208750","https://openalex.org/W6821384415","https://openalex.org/W6826381003"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W1496222301","https://openalex.org/W4312814274","https://openalex.org/W1590307681","https://openalex.org/W2536018345","https://openalex.org/W4285370786","https://openalex.org/W2296488620","https://openalex.org/W3128807919","https://openalex.org/W3176411177"],"abstract_inverted_index":{"Deep":[0],"neural":[1,160],"networks":[2],"are":[3],"increasingly":[4],"required":[5],"to":[6,54,144,166,188,213,224],"operate":[7],"across":[8,72,132,252],"diverse":[9],"hardware":[10],"platforms,":[11],"latency":[12,226],"constraints,":[13],"and":[14,29,68,78,101,135,138,163,211,227,246],"power":[15],"budgets,":[16],"which":[17,140],"motivates":[18],"the":[19,117,234,241],"need":[20],"for":[21,24,95,203],"specialized":[22],"models":[23,42,170,251],"each":[25],"scenario.":[26],"However,":[27],"designing":[28],"training":[30,59,77,100,112,137,245],"a":[31,38,51,60,125,130,172,180,192,200,253],"separate":[32],"model":[33,182],"per":[34],"scenario":[35],"or":[36],"serving":[37,248],"large":[39],"ensemble":[40],"of":[41,87,119,194,243,249,255],"is":[43],"often":[44],"impractical.":[45],"Weight":[46],"sharing":[47,94,236],"has":[48],"emerged":[49],"as":[50],"promising":[52],"paradigm":[53,237],"address":[55],"this":[56],"challenge":[57],"by":[58,69,114],"single":[61,173],"''SuperNet''":[62],"that":[63,91,233],"subsumes":[64],"many":[65,145],"sub-models":[66],"(SubNets),":[67],"reusing":[70],"weights":[71],"those":[73],"SubNets":[74,121],"both":[75,99,244],"at":[76,148],"inference":[79,102,176,247],"time.":[80],"This":[81],"paper":[82],"provides":[83],"an":[84],"abridged":[85],"survey":[86],"our":[88],"recent":[89],"advances":[90],"leverage":[92],"weight":[93,235],"efficient":[96,169],"AI,":[97],"covering":[98],"serving.":[103],"In":[104,124],"centralized":[105],"once-for-all":[106],"training,":[107],"Delayed":[108],"\u03b5-Shrinking":[109],"(D\u03b5S)":[110],"improves":[111],"efficiency":[113,242],"strategically":[115],"scheduling":[116],"introduction":[118],"smaller":[120],"during":[122],"training.":[123],"federated":[126],"fashion,":[127],"SuperFedNas":[128],"co-trains":[129],"SuperNet":[131],"distributed":[133],"clients":[134],"disjoins":[136],"searching,":[139],"enables":[141],"oneshot":[142],"specialization":[143],"deployment":[146],"targets":[147],"minimal":[149],"cost.":[150],"\u2207QDARTS":[151],"integrates":[152],"quantization":[153],"into":[154],"differentiable":[155],"architecture":[156],"search,":[157],"jointly":[158],"finding":[159],"architectures,":[161],"weights,":[162],"low-precision":[164],"settings":[165],"yield":[167],"highly":[168],"in":[171],"search.":[174],"For":[175],"serving,":[177],"SuperServe":[178],"introduces":[179],"weight-shared":[181],"with":[183,199],"dynamic":[184],"SubNet":[185],"routing":[186],"(SubNetAct)":[187],"instantaneously":[189],"switch":[190],"among":[191],"spectrum":[193],"accuracy-latency":[195],"operating":[196],"points,":[197],"coupled":[198],"scheduler":[201],"(SlackFit)":[202],"unpredictable":[204],"workloads.":[205],"Finally,":[206],"SUSHI":[207],"co-designs":[208],"model,":[209],"system,":[210],"accelerator":[212],"exploit":[214],"weightshared":[215],"SuperNets":[216],"on":[217,222],"tinyML":[218],"devices,":[219],"caching":[220],"SubGraphs":[221],"FPGA":[223],"reduce":[225],"energy.":[228],"Together,":[229],"these":[230],"works":[231],"demonstrate":[232],"can":[238],"dramatically":[239],"improve":[240],"deep":[250],"range":[254],"scenarios.":[256]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
