{"id":"https://openalex.org/W1969399184","doi":"https://doi.org/10.1145/1810085.1810105","title":"An experimental approach to performance measurement of heterogeneous parallel applications using CUDA","display_name":"An experimental approach to performance measurement of heterogeneous parallel applications using CUDA","publication_year":2010,"publication_date":"2010-06-02","ids":{"openalex":"https://openalex.org/W1969399184","doi":"https://doi.org/10.1145/1810085.1810105","mag":"1969399184"},"language":"en","primary_location":{"id":"doi:10.1145/1810085.1810105","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1810085.1810105","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM International Conference on Supercomputing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5029524538","display_name":"Allen D. Malony","orcid":"https://orcid.org/0000-0002-9598-7201"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Allen D. Malony","raw_affiliation_strings":["University of Oregon, Eugene, OR"],"affiliations":[{"raw_affiliation_string":"University of Oregon, Eugene, OR","institution_ids":["https://openalex.org/I181233156"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5105669996","display_name":"Scott Biersdorff","orcid":"https://orcid.org/0000-0003-4210-7335"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Scott Biersdorff","raw_affiliation_strings":["University of Oregon, Eugene, OR"],"affiliations":[{"raw_affiliation_string":"University of Oregon, Eugene, OR","institution_ids":["https://openalex.org/I181233156"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112806937","display_name":"Wyatt Spear","orcid":"https://orcid.org/0009-0005-9704-7642"},"institutions":[{"id":"https://openalex.org/I181233156","display_name":"University of Oregon","ror":"https://ror.org/0293rh119","country_code":"US","type":"education","lineage":["https://openalex.org/I181233156"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wyatt Spear","raw_affiliation_strings":["University of Oregon, Eugene, OR"],"affiliations":[{"raw_affiliation_string":"University of Oregon, Eugene, OR","institution_ids":["https://openalex.org/I181233156"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056917418","display_name":"Shangkar Mayanglambam","orcid":null},"institutions":[{"id":"https://openalex.org/I4210087596","display_name":"Qualcomm (United States)","ror":"https://ror.org/002zrf773","country_code":"US","type":"company","lineage":["https://openalex.org/I4210087596"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shangkar Mayanglambam","raw_affiliation_strings":["Qualcomm Corporation, Santa Clara, CA"],"affiliations":[{"raw_affiliation_string":"Qualcomm Corporation, Santa Clara, CA","institution_ids":["https://openalex.org/I4210087596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5029524538"],"corresponding_institution_ids":["https://openalex.org/I181233156"],"apc_list":null,"apc_paid":null,"fwci":4.40817893,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.95248933,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"127","last_page":"136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.9414180517196655},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.854911744594574},{"id":"https://openalex.org/keywords/executable","display_name":"Executable","score":0.7686676979064941},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.737028956413269},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6482680439949036},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6432098150253296},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.6230982542037964},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.6186614036560059},{"id":"https://openalex.org/keywords/acceleration","display_name":"Acceleration","score":0.48013588786125183},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3795861303806305},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3270219564437866},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.3037150800228119},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.26730823516845703}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.9414180517196655},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.854911744594574},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.7686676979064941},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.737028956413269},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6482680439949036},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6432098150253296},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.6230982542037964},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.6186614036560059},{"id":"https://openalex.org/C117896860","wikidata":"https://www.wikidata.org/wiki/Q11376","display_name":"Acceleration","level":2,"score":0.48013588786125183},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3795861303806305},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3270219564437866},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.3037150800228119},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.26730823516845703},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C74650414","wikidata":"https://www.wikidata.org/wiki/Q11397","display_name":"Classical mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1810085.1810105","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1810085.1810105","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 24th ACM International Conference on Supercomputing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4399999976158142,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G8841478033","display_name":null,"funder_award_id":"DE-PS02-08ER08-19DEFG02-07ER25826DE-AC02-06CH11357","funder_id":"https://openalex.org/F4320306084","funder_display_name":"U.S. Department of Energy"}],"funders":[{"id":"https://openalex.org/F4320306084","display_name":"U.S. Department of Energy","ror":"https://ror.org/01bj3aw27"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W20481021","https://openalex.org/W1515668834","https://openalex.org/W1566403276","https://openalex.org/W1580800534","https://openalex.org/W1585031423","https://openalex.org/W2004139537","https://openalex.org/W2086824943","https://openalex.org/W2099000434","https://openalex.org/W2136434791","https://openalex.org/W2149234156","https://openalex.org/W2150981663","https://openalex.org/W2170611190","https://openalex.org/W2395771004","https://openalex.org/W2494991438","https://openalex.org/W4255916494"],"related_works":["https://openalex.org/W3192364216","https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759"],"abstract_inverted_index":{"Heterogeneous":[0],"parallel":[1,41,63,113],"systems":[2],"using":[3],"GPU":[4,24,106,158],"devices":[5],"for":[6,50],"application":[7,26],"acceleration":[8],"have":[9],"garnered":[10],"significant":[11],"attention":[12],"in":[13],"the":[14,20,40,51,61,69,90,103,109,112,119,128,137,161],"supercomputing":[15],"community.":[16],"However,":[17],"to":[18,31,39,118,141],"realize":[19],"full":[21],"potential":[22],"of":[23,68,105,148,160],"computing,":[25],"developers":[27],"will":[28],"require":[29],"tools":[30],"measure":[32],"and":[33,58,80,83,108,131,164],"analyze":[34],"accelerator":[35],"performance":[36,47,64,100,144],"with":[37,60,111,136],"respect":[38],"execution":[42,104],"as":[43],"a":[44,157,165],"whole.":[45],"A":[46],"measurement":[48,139],"technology":[49,130],"NVIDIA":[52,77],"CUDA":[53,78,91,154],"platform":[54],"has":[55],"been":[56],"developed":[57],"integrated":[59,135,143],"TAU":[62,138],"system.":[65],"The":[66,125],"design":[67],"TAUcuda":[70,96,129,149],"package":[71],"is":[72,94,134],"based":[73],"on":[74],"an":[75],"experimental":[76,92],"driver":[79,93],"associated":[81],"runtime":[82],"device":[84],"libraries.":[85],"In":[86],"any":[87,116],"environment":[88],"where":[89],"installed,":[95],"can":[97],"provide":[98,142],"detailed":[99],"information":[101],"regarding":[102],"kernels":[107],"interactions":[110],"program":[114,120],"without":[115],"modification":[117],"source":[121],"or":[122],"executable":[123],"code.":[124],"paper":[126],"describes":[127],"how":[132],"it":[133],"framework":[140],"views.":[145],"Various":[146],"examples":[147],"use":[150],"are":[151],"presented,":[152],"including":[153],"SDK":[155],"examples,":[156],"version":[159],"Linpack":[162],"benchmark,":[163],"scalable":[166],"molecular":[167],"dynamics":[168],"application,":[169],"NAMD.":[170]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":5},{"year":2013,"cited_by_count":3},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
