{"id":"https://openalex.org/W7125399051","doi":"https://doi.org/10.1145/3784828.3785241","title":"Orchid: Towards Heterogeneous Batched Eigenvalue Solvers","display_name":"Orchid: Towards Heterogeneous Batched Eigenvalue Solvers","publication_year":2026,"publication_date":"2026-01-22","ids":{"openalex":"https://openalex.org/W7125399051","doi":"https://doi.org/10.1145/3784828.3785241"},"language":null,"primary_location":{"id":"doi:10.1145/3784828.3785241","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3784828.3785241","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region Workshops","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3784828.3785241","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123541511","display_name":"Matthew Chung","orcid":null},"institutions":[{"id":"https://openalex.org/I103635307","display_name":"University of California, Riverside","ror":"https://ror.org/03nawhv43","country_code":"US","type":"education","lineage":["https://openalex.org/I103635307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matthew Chung","raw_affiliation_strings":["University of California, Riverside, Riverside, California, USA"],"raw_orcid":"https://orcid.org/0009-0001-1321-2497","affiliations":[{"raw_affiliation_string":"University of California, Riverside, Riverside, California, USA","institution_ids":["https://openalex.org/I103635307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122379199","display_name":"Keita Teranishi","orcid":null},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Keita Teranishi","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, Tennessee, USA"],"raw_orcid":"https://orcid.org/0000-0001-6647-2690","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, Tennessee, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Narasinga Rao Miniskar","orcid":"https://orcid.org/0000-0001-8259-8891"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Narasinga Rao Miniskar","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, Tennessee, USA"],"raw_orcid":"https://orcid.org/0000-0001-8259-8891","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, Tennessee, USA","institution_ids":["https://openalex.org/I1289243028"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Toshiyuki Imamura","orcid":"https://orcid.org/0000-0003-1601-9710"},"institutions":[{"id":"https://openalex.org/I4210129730","display_name":"RIKEN Center for Computational Science","ror":"https://ror.org/03r519674","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210110652","https://openalex.org/I4210129730"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Toshiyuki Imamura","raw_affiliation_strings":["RIKEN R-CCS, Kobe, Japan"],"raw_orcid":"https://orcid.org/0000-0003-1601-9710","affiliations":[{"raw_affiliation_string":"RIKEN R-CCS, Kobe, Japan","institution_ids":["https://openalex.org/I4210129730"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004329088","display_name":"Mohammad Alaul Haque Monil","orcid":"https://orcid.org/0000-0003-3419-4037"},"institutions":[{"id":"https://openalex.org/I1289243028","display_name":"Oak Ridge National Laboratory","ror":"https://ror.org/01qz5mb56","country_code":"US","type":"facility","lineage":["https://openalex.org/I1289243028","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I4210159294"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mohammad Alaul Haque Monil","raw_affiliation_strings":["Oak Ridge National Laboratory, Oak Ridge, Tennessee, USA"],"raw_orcid":"https://orcid.org/0000-0003-3419-4037","affiliations":[{"raw_affiliation_string":"Oak Ridge National Laboratory, Oak Ridge, Tennessee, USA","institution_ids":["https://openalex.org/I1289243028"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0622506,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"330","last_page":"338"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9103999733924866,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9103999733924866,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10792","display_name":"Matrix Theory and Algorithms","score":0.011599999852478504,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.01080000028014183,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.8069000244140625},{"id":"https://openalex.org/keywords/solver","display_name":"Solver","score":0.6973999738693237},{"id":"https://openalex.org/keywords/eigenvalues-and-eigenvectors","display_name":"Eigenvalues and eigenvectors","score":0.5389999747276306},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.46869999170303345},{"id":"https://openalex.org/keywords/directed-acyclic-graph","display_name":"Directed acyclic graph","score":0.46000000834465027},{"id":"https://openalex.org/keywords/orchestration","display_name":"Orchestration","score":0.43709999322891235},{"id":"https://openalex.org/keywords/node","display_name":"Node (physics)","score":0.39559999108314514}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8111000061035156},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.8069000244140625},{"id":"https://openalex.org/C2778770139","wikidata":"https://www.wikidata.org/wiki/Q1966904","display_name":"Solver","level":2,"score":0.6973999738693237},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6172999739646912},{"id":"https://openalex.org/C158693339","wikidata":"https://www.wikidata.org/wiki/Q190524","display_name":"Eigenvalues and eigenvectors","level":2,"score":0.5389999747276306},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.46869999170303345},{"id":"https://openalex.org/C74197172","wikidata":"https://www.wikidata.org/wiki/Q1195339","display_name":"Directed acyclic graph","level":2,"score":0.46000000834465027},{"id":"https://openalex.org/C199168358","wikidata":"https://www.wikidata.org/wiki/Q3367000","display_name":"Orchestration","level":3,"score":0.43709999322891235},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4047999978065491},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.39559999108314514},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.39399999380111694},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.39259999990463257},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3903999924659729},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.3433000147342682},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.31470000743865967},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2856999933719635},{"id":"https://openalex.org/C3017489831","wikidata":"https://www.wikidata.org/wiki/Q2393193","display_name":"Running time","level":2,"score":0.2653999924659729},{"id":"https://openalex.org/C179799912","wikidata":"https://www.wikidata.org/wiki/Q205084","display_name":"Computational complexity theory","level":2,"score":0.26460000872612},{"id":"https://openalex.org/C138959212","wikidata":"https://www.wikidata.org/wiki/Q1806783","display_name":"Load balancing (electrical power)","level":3,"score":0.2551000118255615},{"id":"https://openalex.org/C139352143","wikidata":"https://www.wikidata.org/wiki/Q82571","display_name":"Linear algebra","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3784828.3785241","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3784828.3785241","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region Workshops","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3784828.3785241","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3784828.3785241","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Supercomputing Asia and International Conference on High Performance Computing in Asia Pacific Region Workshops","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1900894077","display_name":null,"funder_award_id":"LAB 24-3210 under contract ERKJ452","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2121893797","https://openalex.org/W2149227288","https://openalex.org/W3006183350","https://openalex.org/W3007543653","https://openalex.org/W3041433688","https://openalex.org/W4200300530","https://openalex.org/W4318970015","https://openalex.org/W4320061890","https://openalex.org/W4322576898","https://openalex.org/W4367147549","https://openalex.org/W4388581050","https://openalex.org/W4399130511"],"related_works":[],"abstract_inverted_index":{"This":[0],"work":[1],"introduces":[2],"Orchid,":[3],"a":[4,20,37,46,57],"novel":[5],"batched":[6,80],"eigenvalue":[7,51],"solver":[8],"designed":[9],"for":[10,31,41,161],"multi-accelerator":[11],"platforms.":[12],"Orchid":[13,44,77,98,124,147],"fully":[14],"leverages":[15],"all":[16],"compute":[17],"units":[18],"within":[19],"node":[21],"by":[22],"exploiting":[23],"the":[24,28,69,74,121],"orchestration":[25],"capabilities":[26],"of":[27,62],"IRIS":[29,70],"runtime":[30],"heterogeneous":[32,152],"systems.":[33],"Integrated":[34],"into":[35],"MatRIS,":[36],"performance-portable":[38],"math":[39],"library":[40],"extreme":[42],"heterogeneity,":[43],"delivers":[45],"highly":[47],"efficient":[48],"and":[49,64,85,101,110,129,135],"productive":[50],"solver.":[52],"It":[53],"decomposes":[54],"computations,":[55],"constructs":[56],"directed":[58],"acyclic":[59],"graph":[60],"(DAG)":[61],"tasks,":[63],"orchestrates":[65],"execution":[66,90],"intelligently":[67],"through":[68,155],"runtime.":[71],"Depending":[72],"on":[73,99,114],"underlying":[75],"architecture,":[76],"employs":[78],"optimized":[79],"kernels":[81],"from":[82,93],"EigenG-Batched,":[83],"cuSOLVER,":[84],"hipSOLVER,":[86,136],"enabling":[87],"automated":[88],"multi-GPU":[89],"across":[91],"devices":[92],"different":[94],"vendors.":[95],"We":[96],"evaluate":[97],"NVIDIA":[100],"AMD":[102],"GPUs,":[103,116],"demonstrating":[104],"scalability":[105],"with":[106],"up":[107,126],"to":[108,127,133],"5.8x":[109],"6.52x":[111],"performance":[112,150],"gains":[113],"eight":[115],"respectively.":[117],"Furthermore,":[118],"when":[119],"using":[120],"EigenG-Batched":[122],"backend,":[123],"achieves":[125],"24.54x":[128],"52.61x":[130],"speedups":[131],"compared":[132],"cuSOLVER":[134],"delivering":[137],"substantial":[138],"improvements":[139],"over":[140],"vendor-provided":[141],"libraries.":[142],"By":[143],"employing":[144],"dynamic":[145],"scheduling,":[146],"attains":[148],"superior":[149],"in":[151],"GPU":[153,163],"environments":[154],"effective":[156],"load":[157],"balancing":[158],"that":[159],"accounts":[160],"individual":[162],"computational":[164],"capabilities.":[165]},"counts_by_year":[],"updated_date":"2026-06-17T08:01:34.144755","created_date":"2026-01-23T00:00:00"}
