{"id":"https://openalex.org/W2897561292","doi":"https://doi.org/10.1145/3234664.3234681","title":"Performance Comparison of CUDA and OpenACC Based on Optimizations","display_name":"Performance Comparison of CUDA and OpenACC Based on Optimizations","publication_year":2018,"publication_date":"2018-06-22","ids":{"openalex":"https://openalex.org/W2897561292","doi":"https://doi.org/10.1145/3234664.3234681","mag":"2897561292"},"language":"en","primary_location":{"id":"doi:10.1145/3234664.3234681","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3234664.3234681","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 2nd High Performance Computing and Cluster Technologies Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101689192","display_name":"Xuechao Li","orcid":"https://orcid.org/0000-0003-1462-5290"},"institutions":[{"id":"https://openalex.org/I19406000","display_name":"Concordia University Chicago","ror":"https://ror.org/02jvqj155","country_code":"US","type":"education","lineage":["https://openalex.org/I19406000"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xuechao Li","raw_affiliation_strings":["Computer Science Department, Concordia University Chicago, River Forest IL, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department, Concordia University Chicago, River Forest IL, USA","institution_ids":["https://openalex.org/I19406000"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5007528785","display_name":"Po-Chou Shih","orcid":"https://orcid.org/0000-0001-8428-7165"},"institutions":[{"id":"https://openalex.org/I118292597","display_name":"National Taipei University of Technology","ror":"https://ror.org/00cn92c09","country_code":"TW","type":"education","lineage":["https://openalex.org/I118292597"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Po-Chou Shih","raw_affiliation_strings":["Institute of Industrial and Business Management, National Taipei University of Technology Taipei, Taiwan"],"affiliations":[{"raw_affiliation_string":"Institute of Industrial and Business Management, National Taipei University of Technology Taipei, Taiwan","institution_ids":["https://openalex.org/I118292597"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101689192"],"corresponding_institution_ids":["https://openalex.org/I19406000"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.13150819,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"53","last_page":"57"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.9729582667350769},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8251524567604065},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.816047191619873},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.7881217002868652},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5983763337135315},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5444679260253906},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.46500882506370544},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.264931857585907},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.23630565404891968},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.14804205298423767},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07047170400619507}],"concepts":[{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.9729582667350769},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8251524567604065},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.816047191619873},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.7881217002868652},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5983763337135315},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5444679260253906},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.46500882506370544},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.264931857585907},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.23630565404891968},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.14804205298423767},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07047170400619507},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3234664.3234681","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3234664.3234681","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 2nd High Performance Computing and Cluster Technologies Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W831582143","https://openalex.org/W1678662003","https://openalex.org/W2002487373","https://openalex.org/W2011637083","https://openalex.org/W2018408367","https://openalex.org/W2056862683","https://openalex.org/W2078994750","https://openalex.org/W2080592089","https://openalex.org/W2108157916","https://openalex.org/W2109655969","https://openalex.org/W2121082877","https://openalex.org/W2122078011","https://openalex.org/W2123424315","https://openalex.org/W2130597822","https://openalex.org/W2264950386","https://openalex.org/W2400593953","https://openalex.org/W2432215402","https://openalex.org/W2548173935","https://openalex.org/W2753411857"],"related_works":["https://openalex.org/W1963859303","https://openalex.org/W2364044215","https://openalex.org/W2389600408","https://openalex.org/W240129890","https://openalex.org/W3048701459","https://openalex.org/W2149078538","https://openalex.org/W2080146221","https://openalex.org/W2370314112","https://openalex.org/W1912958759","https://openalex.org/W2792081825"],"abstract_inverted_index":{"Based":[0],"on":[1,24],"various":[2],"optimizations,":[3],"this":[4],"paper":[5],"presents":[6],"a":[7,50,129,135],"performance":[8,21],"comparison":[9],"between":[10],"CUDA":[11,84,139],"and":[12,29,34,39,134,156,164],"OpenACC":[13,73,99,127,150,159],"using":[14],"19":[15],"kernels":[16,74],"in":[17,64,98,107,120,146,157],"10":[18],"benchmarks.":[19],"The":[20,59],"analysis":[22],"focuses":[23],"programming":[25,132],"models,":[26],"optimization":[27],"technologies":[28],"underlying":[30],"compilers.":[31],"It":[32],"measures":[33],"compares":[35],"kernel":[36],"execution":[37],"times":[38,42],"data":[40,95],"transfer":[41,96],"to/from":[43],"the":[44,66,90,94,110,144],"GPU.":[45],"In":[46],"addition,":[47],"it":[48],"utilizes":[49],"Performance":[51],"Ratio":[52],"metric":[53],"to":[54,71,102,116,138],"conduct":[55],"an":[56],"objective":[57],"comparison.":[58],"experimental":[60],"results":[61],"show":[62],"that":[63,78,88,126],"general":[65],"PGI":[67],"compiler":[68],"is":[69,79,128,160],"able":[70],"translate":[72],"into":[75],"object":[76],"code":[77],"slightly":[80],"slower":[81],"than":[82,106,119],"hand-written":[83],"codes":[85],"for":[86,140,162,165],"benchmarks":[87],"solve":[89],"same":[91],"problem.":[92],"Also,":[93],"time":[97],"programs":[100,145],"tends":[101,115],"be":[103,117],"much":[104],"faster":[105],"CUDA,":[108,155],"while":[109],"number":[111],"of":[112],"memcpy":[113],"calls":[114],"higher":[118],"CUDA.":[121],"Overall":[122],"conclusions":[123],"were":[124],"found":[125],"very":[130],"reliable":[131],"model":[133],"good":[136],"alternative":[137],"accelerator":[141],"devices.":[142],"For":[143],"our":[147],"test":[148],"corpus,":[149],"performs":[151],"as":[152,154],"well":[153],"general,":[158],"better":[161],"novices":[163],"programmers":[166],"targeting":[167],"multiple":[168],"platforms.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
