{"id":"https://openalex.org/W4415285677","doi":"https://doi.org/10.1145/3725843.3756039","title":"ORCHES: Orchestrated Test-Time-Compute-based LLM Reasoning on Collaborative GPU-PIM HEterogeneous System","display_name":"ORCHES: Orchestrated Test-Time-Compute-based LLM Reasoning on Collaborative GPU-PIM HEterogeneous System","publication_year":2025,"publication_date":"2025-10-17","ids":{"openalex":"https://openalex.org/W4415285677","doi":"https://doi.org/10.1145/3725843.3756039"},"language":"en","primary_location":{"id":"doi:10.1145/3725843.3756039","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725843.3756039","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3725843.3756039","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060660400","display_name":"Sixu Li","orcid":"https://orcid.org/0000-0002-9105-9299"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sixu Li","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-9105-9299","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010349853","display_name":"Yuzhou Chen","orcid":"https://orcid.org/0009-0004-9236-0480"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yuzhou Chen","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0009-0004-9236-0480","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065172226","display_name":"Chaojian Li","orcid":"https://orcid.org/0000-0003-4030-9777"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Chaojian Li","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0003-4030-9777","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061572132","display_name":"Yonggan Fu","orcid":"https://orcid.org/0000-0002-7483-2921"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yonggan Fu","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-7483-2921","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zheng Wang","orcid":"https://orcid.org/0009-0002-9467-7460"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheng Wang","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0009-0002-9467-7460","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034614246","display_name":"Zhongzhi Yu","orcid":"https://orcid.org/0000-0002-9981-4981"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhongzhi Yu","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-9981-4981","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Haoran You","orcid":"https://orcid.org/0000-0002-2873-2153"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Haoran You","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-2873-2153","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020142088","display_name":"Zhifan Ye","orcid":"https://orcid.org/0000-0003-0755-8843"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhifan Ye","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0003-0755-8843","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wei Zhou","orcid":"https://orcid.org/0000-0002-9770-3583"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Zhou","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0002-9770-3583","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101912939","display_name":"Yongan Zhang","orcid":"https://orcid.org/0000-0001-7919-049X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yongan Zhang","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0001-7919-049X","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019582323","display_name":"Yingyan Lin","orcid":"https://orcid.org/0000-0001-5946-203X"},"institutions":[{"id":"https://openalex.org/I130701444","display_name":"Georgia Institute of Technology","ror":"https://ror.org/01zkghx44","country_code":"US","type":"education","lineage":["https://openalex.org/I130701444"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yingyan (Celine) Lin","raw_affiliation_strings":["Georgia Institute of Technology, Atlanta, USA"],"raw_orcid":"https://orcid.org/0000-0001-5946-203X","affiliations":[{"raw_affiliation_string":"Georgia Institute of Technology, Atlanta, USA","institution_ids":["https://openalex.org/I130701444"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":11,"corresponding_author_ids":["https://openalex.org/A5060660400"],"corresponding_institution_ids":["https://openalex.org/I130701444"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.29486919,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"476","last_page":"489"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13382","display_name":"Robotics and Automated Systems","score":0.9850999712944031,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10409","display_name":"Fuel Cells and Related Materials","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/locality","display_name":"Locality","score":0.6407999992370605},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5738000273704529},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.53329998254776},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5327000021934509},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.48989999294281006},{"id":"https://openalex.org/keywords/limiting","display_name":"Limiting","score":0.43470001220703125},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.424699991941452},{"id":"https://openalex.org/keywords/data-access","display_name":"Data access","score":0.35359999537467957},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.353300005197525}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8533999919891357},{"id":"https://openalex.org/C2779808786","wikidata":"https://www.wikidata.org/wiki/Q6664603","display_name":"Locality","level":2,"score":0.6407999992370605},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5738000273704529},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.53329998254776},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5327000021934509},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4970000088214874},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.48989999294281006},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.453000009059906},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.43470001220703125},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.424699991941452},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.35359999537467957},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.353300005197525},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.3495999872684479},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3264000117778778},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.31709998846054077},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.3018999993801117},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C78766204","wikidata":"https://www.wikidata.org/wiki/Q555032","display_name":"Multi-core processor","level":2,"score":0.27709999680519104},{"id":"https://openalex.org/C133875982","wikidata":"https://www.wikidata.org/wiki/Q764810","display_name":"Shared memory","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C56348326","wikidata":"https://www.wikidata.org/wiki/Q1047554","display_name":"Mutual exclusion","level":2,"score":0.27399998903274536},{"id":"https://openalex.org/C100980136","wikidata":"https://www.wikidata.org/wiki/Q4668956","display_name":"Malleability","level":4,"score":0.26930001378059387},{"id":"https://openalex.org/C27602214","wikidata":"https://www.wikidata.org/wiki/Q1868547","display_name":"Locality of reference","level":3,"score":0.2630000114440918},{"id":"https://openalex.org/C195344581","wikidata":"https://www.wikidata.org/wiki/Q2555318","display_name":"Automated reasoning","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3725843.3756039","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725843.3756039","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-167582","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-167582","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":{"id":"doi:10.1145/3725843.3756039","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725843.3756039","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 58th IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G8042391404","display_name":null,"funder_award_id":"1937592, 2048183, 2434166","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":16,"referenced_works":["https://openalex.org/W2034861439","https://openalex.org/W2508602506","https://openalex.org/W2518281301","https://openalex.org/W2889354425","https://openalex.org/W2983890558","https://openalex.org/W3042493405","https://openalex.org/W3134274954","https://openalex.org/W4386765002","https://openalex.org/W4387321091","https://openalex.org/W4388757726","https://openalex.org/W4392427708","https://openalex.org/W4394998968","https://openalex.org/W4401211807","https://openalex.org/W4404955001","https://openalex.org/W4409248600","https://openalex.org/W4409248709"],"related_works":[],"abstract_inverted_index":{"Recent":[0],"breakthroughs":[1],"in":[2,206],"AI":[3],"reasoning,":[4],"enabled":[5],"by":[6,60,95],"test-time":[7],"compute":[8],"(TTC)":[9],"on":[10],"compact":[11],"large":[12],"language":[13],"models":[14],"(LLMs),":[15],"offer":[16],"great":[17],"potential":[18],"for":[19,196],"edge":[20],"devices":[21],"to":[22,105,119,140,152],"effectively":[23],"execute":[24],"complex":[25],"reasoning":[26,80,200,212],"tasks.":[27],"However,":[28],"the":[29,96,207,210],"intricate":[30],"inference":[31],"pipelines":[32],"associated":[33],"with":[34],"TTC":[35],"pose":[36],"new":[37],"efficiency":[38,168],"bottlenecks,":[39],"limiting":[40],"achievable":[41],"latency":[42],"and":[43,65,82,89,101,137,158,166,172,188,198],"hindering":[44,75],"widespread":[45],"adoption.":[46],"Through":[47],"an":[48],"in-depth":[49],"analysis,":[50],"we":[51,109],"identify":[52],"three":[53,125],"key":[54,126],"barriers:":[55],"(1)":[56,128],"variable":[57],"parallelism,":[58],"characterized":[59],"inference-dependent":[61],"dynamic":[62],"control":[63],"flows":[64],"varying":[66],"batch":[67],"sizes,":[68],"complicating":[69],"workload":[70,130],"scheduling;":[71],"(2)":[72,146],"branch":[73,84],"dependencies,":[74],"efficient":[76],"pipelining":[77],"across":[78],"sequential":[79],"steps;":[81],"(3)":[83,159],"pruning,":[85],"causing":[86],"memory":[87,161,174],"fragmentation":[88],"irregular":[90],"data":[91,107,164],"access":[92,167],"patterns.":[93],"Motivated":[94],"memory-bound":[97],"nature":[98],"of":[99,185,209],"LLMs":[100],"Processing-in-Memory":[102],"(PIM)'s":[103],"capability":[104],"reduce":[106,154],"movement,":[108],"propose":[110],"ORCHES,":[111],"a":[112],"novel":[113],"GPU-PIM":[114],"collaborative":[115],"system":[116],"specifically":[117],"designed":[118],"address":[120],"these":[121],"barriers.":[122],"ORCHES":[123,181],"integrates":[124],"innovations:":[127],"adaptive":[129],"assignment,":[131],"dynamically":[132],"balancing":[133],"workloads":[134],"between":[135],"GPU":[136,194],"PIM":[138],"units":[139],"maximize":[141],"parallelism":[142],"despite":[143],"unpredictable":[144],"branching;":[145],"branch-aware":[147],"pipelining,":[148],"leveraging":[149],"speculative":[150],"execution":[151],"substantially":[153],"inter-step":[155],"pipeline":[156],"stalls;":[157],"fragmentation-aware":[160],"structuring,":[162],"enhancing":[163],"locality":[165],"through":[169],"coordinated":[170],"caching":[171],"optimized":[173],"layout":[175],"reorganization.":[176],"Experimental":[177],"results":[178],"demonstrate":[179],"that":[180],"achieves":[182],"average":[183],"speedups":[184],"4.16":[186],"\u00d7":[187,190],"3.10":[189],"over":[191],"state-of-the-art":[192],"(SOTA)":[193],"implementations":[195],"text-based":[197],"vision-based":[199],"tasks,":[201],"respectively,":[202],"without":[203],"any":[204],"loss":[205],"accuracy":[208],"original":[211],"pipeline.":[213]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-17T00:00:00"}
