{"id":"https://openalex.org/W2000287918","doi":"https://doi.org/10.1145/2464996.2465023","title":"Scaling large-data computations on multi-GPU accelerators","display_name":"Scaling large-data computations on multi-GPU accelerators","publication_year":2013,"publication_date":"2013-05-28","ids":{"openalex":"https://openalex.org/W2000287918","doi":"https://doi.org/10.1145/2464996.2465023","mag":"2000287918"},"language":"en","primary_location":{"id":"doi:10.1145/2464996.2465023","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2464996.2465023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th international ACM conference on International conference on supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000932612","display_name":"Amit Sabne","orcid":"https://orcid.org/0000-0002-2179-0078"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amit Sabne","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008393837","display_name":"Putt Sakdhnagool","orcid":"https://orcid.org/0000-0002-7925-0525"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Putt Sakdhnagool","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045622261","display_name":"Rudolf Eigenmann","orcid":"https://orcid.org/0000-0003-1651-827X"},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rudolf Eigenmann","raw_affiliation_strings":["Purdue University, West Lafayette, IN, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Purdue University, West Lafayette, IN, USA","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I219193219"],"apc_list":null,"apc_paid":null,"fwci":0.9518,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.75064577,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"443","last_page":"454"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8934401273727417},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7644933462142944},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7069449424743652},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.6670754551887512},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5399063229560852},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.4554843306541443},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.42104750871658325},{"id":"https://openalex.org/keywords/runtime-system","display_name":"Runtime system","score":0.41151243448257446},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1861392855644226}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8934401273727417},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7644933462142944},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7069449424743652},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.6670754551887512},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5399063229560852},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.4554843306541443},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.42104750871658325},{"id":"https://openalex.org/C2780870223","wikidata":"https://www.wikidata.org/wiki/Q1004415","display_name":"Runtime system","level":2,"score":0.41151243448257446},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1861392855644226},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2464996.2465023","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2464996.2465023","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th international ACM conference on International conference on supercomputing","raw_type":"proceedings-article"},{"id":"pmh:oai:docs.lib.purdue.edu:ccpubs-1537","is_oa":false,"landing_page_url":"https://docs.lib.purdue.edu/ccpubs/532","pdf_url":null,"source":{"id":"https://openalex.org/S4377196310","display_name":"Purdue e-Pubs (Purdue University System)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2801333002","host_organization_name":"Purdue University System","host_organization_lineage":["https://openalex.org/I2801333002"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Cyber Center Publications","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy","score":0.5799999833106995}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1464102432","https://openalex.org/W1494271578","https://openalex.org/W1537323515","https://openalex.org/W1568192366","https://openalex.org/W1569496172","https://openalex.org/W2012173227","https://openalex.org/W2016618963","https://openalex.org/W2016888570","https://openalex.org/W2032039029","https://openalex.org/W2049875313","https://openalex.org/W2050612421","https://openalex.org/W2074570088","https://openalex.org/W2077154052","https://openalex.org/W2080592089","https://openalex.org/W2107483876","https://openalex.org/W2119304316","https://openalex.org/W2122080725","https://openalex.org/W2122570236","https://openalex.org/W2122978030","https://openalex.org/W2126026097","https://openalex.org/W2128539477","https://openalex.org/W2129817042","https://openalex.org/W2133741166","https://openalex.org/W2140375692","https://openalex.org/W2143708379","https://openalex.org/W2153492376","https://openalex.org/W2161190431","https://openalex.org/W2167101788","https://openalex.org/W2167334577","https://openalex.org/W2170634604","https://openalex.org/W3013976982","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W98480971","https://openalex.org/W1966837078","https://openalex.org/W2045177269","https://openalex.org/W2116582200","https://openalex.org/W1580752477","https://openalex.org/W2061453039","https://openalex.org/W1567361500","https://openalex.org/W2242847901"],"abstract_inverted_index":{"Modern":[0],"supercomputers":[1],"rely":[2],"on":[3,100,172,180],"accelerators":[4],"to":[5,49,58,87,95,111,120,129,156,162,166,196],"speed":[6],"up":[7],"highly":[8],"parallel":[9],"workloads.":[10],"Intricate":[11],"programming":[12],"models,":[13],"limited":[14],"device":[15,89],"memory":[16,54,65,90],"sizes":[17,91],"and":[18,25,46,92,140],"overheads":[19],"of":[20,38,137,175,198],"data":[21,69],"transfers":[22],"between":[23],"CPU":[24],"accelerator":[26],"memories":[27],"are":[28,148],"among":[29],"the":[30,35,52,60,64,68,76,114,122,130,134,142,167,182,186,192,199],"open":[31],"challenges":[32],"that":[33,85,153],"restrict":[34],"widespread":[36],"use":[37],"accelerators.":[39],"First,":[40],"this":[41],"paper":[42,77],"proposes":[43],"a":[44,79,103,138,151,173],"mechanism":[45,108,127],"an":[47,158],"implementation":[48],"automatically":[50,93,112],"pipeline":[51,115,143],"CPU-GPU":[53],"channel":[55],"so":[56,118],"as":[57,119],"overlap":[59],"GPU":[61],"computation":[62],"with":[63],"copies,":[66],"alleviating":[67],"transfer":[70],"overhead.":[71],"Second,":[72],"in":[73,133,150],"doing":[74],"so,":[75],"presents":[78],"technique":[80],"called":[81],"Computation":[82],"Splitting,":[83],"COSP,":[84],"caters":[86],"arbitrary":[88],"manages":[94],"run":[96],"out-of-card":[97],"OpenMP-like":[98],"applications":[99],"GPUs.":[101],"Third,":[102],"novel":[104],"adaptive":[105],"runtime":[106,193],"tuning":[107,194],"is":[109,154],"proposed":[110],"select":[113],"stage":[116,144],"size":[117],"gain":[121],"best":[123],"possible":[124],"performance.":[125],"The":[126,146],"adapts":[128],"underlying":[131],"hardware":[132],"starting":[135],"phase":[136],"program":[139,161],"chooses":[141],"size.":[145],"techniques":[147],"implemented":[149],"system":[152],"able":[155],"translate":[157],"input":[159],"OpenMP":[160],"multiple":[163],"GPUs":[164],"attached":[165],"same":[168],"host":[169],"CPU.":[170],"Experimentation":[171],"set":[174],"nine":[176],"benchmarks":[177],"shows":[178],"that,":[179],"average,":[181],"pipelining":[183],"scheme":[184],"improves":[185],"performance":[187],"by":[188],"1.49x,":[189],"while":[190],"limiting":[191],"overhead":[195],"3%":[197],"execution":[200],"time.":[201]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":2}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
