{"id":"https://openalex.org/W4414198725","doi":"https://doi.org/10.1109/dac63849.2025.11132550","title":"ACRS: Adjacent Computation Resource Sharing among Partitioned GPU Sub-Cores","display_name":"ACRS: Adjacent Computation Resource Sharing among Partitioned GPU Sub-Cores","publication_year":2025,"publication_date":"2025-06-22","ids":{"openalex":"https://openalex.org/W4414198725","doi":"https://doi.org/10.1109/dac63849.2025.11132550"},"language":"en","primary_location":{"id":"doi:10.1109/dac63849.2025.11132550","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049749857","display_name":"Penghao Song","orcid":"https://orcid.org/0000-0001-7423-1416"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Penghao Song","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040108243","display_name":"Chongxi Wang","orcid":"https://orcid.org/0000-0003-1295-279X"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chongxi Wang","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103025062","display_name":"Chenji Han","orcid":"https://orcid.org/0009-0007-1247-9644"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenji Han","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102018887","display_name":"Haoyu Zhao","orcid":"https://orcid.org/0000-0003-3757-8773"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyu Zhao","raw_affiliation_strings":["Loongson Technology Co. Ltd,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Loongson Technology Co. Ltd,Beijing,China","institution_ids":["https://openalex.org/I4210155967"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100329618","display_name":"Tingting Zhang","orcid":"https://orcid.org/0000-0002-1724-4904"},"institutions":[{"id":"https://openalex.org/I4210155967","display_name":"OriginWater (China)","ror":"https://ror.org/04h7gmn81","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210155967"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tingting Zhang","raw_affiliation_strings":["Loongson Technology Co. Ltd,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Loongson Technology Co. Ltd,Beijing,China","institution_ids":["https://openalex.org/I4210155967"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100430605","display_name":"Tianyi Liu","orcid":"https://orcid.org/0000-0002-5341-1343"},"institutions":[{"id":"https://openalex.org/I45438204","display_name":"The University of Texas at San Antonio","ror":"https://ror.org/01kd65564","country_code":"US","type":"education","lineage":["https://openalex.org/I45438204"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianyi Liu","raw_affiliation_strings":["University of Texas at San Antonio,United States"],"affiliations":[{"raw_affiliation_string":"University of Texas at San Antonio,United States","institution_ids":["https://openalex.org/I45438204"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100370333","display_name":"Jian Wang","orcid":"https://orcid.org/0000-0001-5416-0649"},"institutions":[{"id":"https://openalex.org/I4210090176","display_name":"Institute of Computing Technology","ror":"https://ror.org/0090r4d87","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210090176"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Wang","raw_affiliation_strings":["Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Institute of Computing Technology, CAS,State Key Lab of Processors,Beijing,China","institution_ids":["https://openalex.org/I4210090176"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5049749857"],"corresponding_institution_ids":["https://openalex.org/I4210090176"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.28195049,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.973800003528595,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.973800003528595,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9732000231742859,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/operand","display_name":"Operand","score":0.791700005531311},{"id":"https://openalex.org/keywords/shared-resource","display_name":"Shared resource","score":0.6661999821662903},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6247000098228455},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5989000201225281},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5519000291824341},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.45649999380111694},{"id":"https://openalex.org/keywords/multi-core-processor","display_name":"Multi-core processor","score":0.4327999949455261},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.4302000105381012},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.4032000005245209}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8532999753952026},{"id":"https://openalex.org/C55526617","wikidata":"https://www.wikidata.org/wiki/Q719375","display_name":"Operand","level":2,"score":0.791700005531311},{"id":"https://openalex.org/C51332947","wikidata":"https://www.wikidata.org/wiki/Q1172305","display_name":"Shared resource","level":2,"score":0.6661999821662903},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6247000098228455},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5989000201225281},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5519000291824341},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5268999934196472},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.45649999380111694},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.435699999332428},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.4302000105381012},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.4032000005245209},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.39399999380111694},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3725999891757965},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.3610999882221222},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.34450000524520874},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.33880001306533813},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3328999876976013},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.3257000148296356},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.32510000467300415},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C147224247","wikidata":"https://www.wikidata.org/wiki/Q885373","display_name":"Bloom filter","level":2,"score":0.29980000853538513},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.2987000048160553},{"id":"https://openalex.org/C2777480716","wikidata":"https://www.wikidata.org/wiki/Q23582796","display_name":"Resource consumption","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26260000467300415},{"id":"https://openalex.org/C30772137","wikidata":"https://www.wikidata.org/wiki/Q5164762","display_name":"Consumption (sociology)","level":2,"score":0.2619999945163727},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.2549000084400177},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.25119999051094055},{"id":"https://openalex.org/C106515295","wikidata":"https://www.wikidata.org/wiki/Q26806595","display_name":"Parallel processing","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dac63849.2025.11132550","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dac63849.2025.11132550","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 62nd ACM/IEEE Design Automation Conference (DAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1970815868","https://openalex.org/W1979527452","https://openalex.org/W1990550838","https://openalex.org/W2053744175","https://openalex.org/W2065924554","https://openalex.org/W2080592089","https://openalex.org/W2090584832","https://openalex.org/W2125979435","https://openalex.org/W2142444503","https://openalex.org/W2152956697","https://openalex.org/W2155568054","https://openalex.org/W2156831150","https://openalex.org/W2160428323","https://openalex.org/W2315868086","https://openalex.org/W2412101011","https://openalex.org/W2470920449","https://openalex.org/W2727422107","https://openalex.org/W2791833900","https://openalex.org/W2798724095","https://openalex.org/W2884590322","https://openalex.org/W2936491961","https://openalex.org/W3102510044","https://openalex.org/W3159607817","https://openalex.org/W3175298944","https://openalex.org/W3206857466","https://openalex.org/W4293159304","https://openalex.org/W4360831990","https://openalex.org/W4381389673","https://openalex.org/W4392910976"],"related_works":[],"abstract_inverted_index":{"Modern":[0],"GPUs":[1],"typically":[2],"segment":[3],"Streaming":[4],"Multiprocessors":[5],"(SMs)":[6],"into":[7],"sub-cores":[8,46,87,140],"(e.g.":[9],"4":[10],"sub-cores)":[11],"to":[12,102,141,149,191],"reduce":[13],"power":[14],"consumption":[15,208],"and":[16,47,119,132,135,173],"chip":[17],"area.":[18],"However,":[19],"this":[20,34],"partitioned":[21,203],"design":[22,167],"prevents":[23],"potential":[24],"task":[25],"distributions":[26],"across":[27],"sub-cores,":[28],"impairing":[29],"overall":[30],"execution":[31],"efficiency.":[32],"In":[33,91],"paper,":[35],"we":[36,93],"explore":[37],"the":[38,95,127,150,177,201,222],"performance":[39,188,218],"benefit":[40],"of":[41,129,170,197],"sharing":[42,99],"hardware":[43],"resources":[44],"among":[45,108],"identify":[48],"functional":[49,133],"units":[50,107],"(FUs)":[51],"as":[52],"critical":[53],"components":[54],"for":[55,164],"compute-intensive":[56],"applications.":[57],"Moreover,":[58],"our":[59],"observations":[60],"reveal":[61],"that":[62,80,185],"instructions":[63,137],"residing":[64],"in":[65,85],"operand":[66,130],"collectors":[67,131],"can":[68],"be":[69],"obstructed":[70],"by":[71,189,209],"back-end":[72],"FUs,":[73],"but":[74],"there":[75],"is":[76,158],"a":[77,160,168],"high":[78],"probability":[79],"unoccupied":[81,106,142],"FUs":[82],"are":[83],"available":[84],"adjacent":[86,96],"during":[88],"such":[89],"blockages.":[90],"response,":[92],"introduce":[94],"computation":[97],"resource":[98],"(ACRS)":[100],"framework":[101],"efficiently":[103],"utilize":[104],"these":[105],"sub-cores.":[109],"ACRS":[110,186,213],"has":[111],"two":[112],"key":[113],"modules:":[114],"Shared":[115,120],"FU":[116,121],"Issue":[117],"(SF_ISSUE)":[118],"Write":[122],"Back":[123],"(SF_WriteBack).":[124],"SF_ISSUE":[125],"monitors":[126],"status":[128],"units,":[134],"offloads":[136],"from":[138],"blocked":[139],"resources.":[143],"Meanwhile,":[144],"SF_WriteBack":[145],"routes":[146],"results":[147,183],"back":[148],"original":[151],"sub-core.To":[152],"minimize":[153],"wiring":[154],"overhead,":[155],"each":[156],"sub-core":[157],"assigned":[159],"fixed":[161],"target":[162],"core":[163],"sharing.":[165],"We":[166],"series":[169],"matching":[171],"policies":[172],"finally":[174],"filter":[175],"out":[176],"most":[178],"effective":[179],"sequential":[180],"method.":[181,224],"Evaluation":[182],"show":[184],"improves":[187],"up":[190],"$46.4":[192],"\\%$,":[193],"with":[194,221],"an":[195,215],"average":[196],"$14.1":[198],"\\%$":[199],"over":[200],"traditional":[202],"architecture,":[204],"while":[205],"reducing":[206],"energy":[207],"$8.3":[210],"\\%$.":[211],"Besides,":[212],"achieves":[214],"additional":[216],"12.3%":[217],"improvement":[219],"compared":[220],"SOTA":[223]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
