{"id":"https://openalex.org/W2964456834","doi":"https://doi.org/10.1109/ipdpsw.2019.00068","title":"Toward an Analytical Performance Model to Select between GPU and CPU Execution","display_name":"Toward an Analytical Performance Model to Select between GPU and CPU Execution","publication_year":2019,"publication_date":"2019-05-01","ids":{"openalex":"https://openalex.org/W2964456834","doi":"https://doi.org/10.1109/ipdpsw.2019.00068","mag":"2964456834"},"language":"en","primary_location":{"id":"doi:10.1109/ipdpsw.2019.00068","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2019.00068","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048684499","display_name":"Artem Chikin","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Artem Chikin","raw_affiliation_strings":["Intel Corporation, Toronto, ON, Canada"],"affiliations":[{"raw_affiliation_string":"Intel Corporation, Toronto, ON, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048554669","display_name":"Jos\u00e9 Nelson Amaral","orcid":"https://orcid.org/0000-0002-9943-1809"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Jose Nelson Amaral","raw_affiliation_strings":["University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038781215","display_name":"Karim Ali","orcid":"https://orcid.org/0000-0002-5516-1376"},"institutions":[{"id":"https://openalex.org/I154425047","display_name":"University of Alberta","ror":"https://ror.org/0160cpw27","country_code":"CA","type":"education","lineage":["https://openalex.org/I154425047"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Karim Ali","raw_affiliation_strings":["University of Alberta, Edmonton, AB, Canada"],"affiliations":[{"raw_affiliation_string":"University of Alberta, Edmonton, AB, Canada","institution_ids":["https://openalex.org/I154425047"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000717061","display_name":"Ettore Tiotto","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113654","display_name":"IBM (Canada)","ror":"https://ror.org/025sxka56","country_code":"CA","type":"company","lineage":["https://openalex.org/I1341412227","https://openalex.org/I4210113654"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Ettore Tiotto","raw_affiliation_strings":["IBM Canada Markham, Markham, ON, Canada"],"affiliations":[{"raw_affiliation_string":"IBM Canada Markham, Markham, ON, Canada","institution_ids":["https://openalex.org/I4210113654"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048684499"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.2038,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.77458909,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"353","last_page":"362"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8776611685752869},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6289105415344238},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.572394609451294},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.5391061902046204},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4859403371810913},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.43260616064071655},{"id":"https://openalex.org/keywords/computer-performance","display_name":"Computer performance","score":0.43055298924446106},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.42934852838516235},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.18027809262275696},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.11205369234085083},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.09586599469184875}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8776611685752869},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6289105415344238},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.572394609451294},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.5391061902046204},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4859403371810913},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.43260616064071655},{"id":"https://openalex.org/C187123476","wikidata":"https://www.wikidata.org/wiki/Q1197550","display_name":"Computer performance","level":2,"score":0.43055298924446106},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.42934852838516235},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.18027809262275696},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.11205369234085083},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.09586599469184875},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdpsw.2019.00068","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2019.00068","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W12500326","https://openalex.org/W1536215775","https://openalex.org/W1651687773","https://openalex.org/W1749508623","https://openalex.org/W1966695753","https://openalex.org/W1988888548","https://openalex.org/W2000857901","https://openalex.org/W2013834502","https://openalex.org/W2019356799","https://openalex.org/W2040356714","https://openalex.org/W2071542942","https://openalex.org/W2095930915","https://openalex.org/W2098824104","https://openalex.org/W2101005153","https://openalex.org/W2103136097","https://openalex.org/W2106139913","https://openalex.org/W2107158148","https://openalex.org/W2118880662","https://openalex.org/W2122418911","https://openalex.org/W2122505371","https://openalex.org/W2131230754","https://openalex.org/W2166536280","https://openalex.org/W2167334577","https://openalex.org/W2169049902","https://openalex.org/W2295219273","https://openalex.org/W2508453970","https://openalex.org/W2777727847","https://openalex.org/W2796649226","https://openalex.org/W2805203449","https://openalex.org/W2891241286","https://openalex.org/W2916922955","https://openalex.org/W2998249817","https://openalex.org/W3103041597","https://openalex.org/W3151682695","https://openalex.org/W4243056457","https://openalex.org/W4246740707","https://openalex.org/W4251798485"],"related_works":["https://openalex.org/W3213381848","https://openalex.org/W2017587301","https://openalex.org/W2012954338","https://openalex.org/W2005148983","https://openalex.org/W2096672917","https://openalex.org/W2392023973","https://openalex.org/W2939411666","https://openalex.org/W2023832055","https://openalex.org/W2076165488","https://openalex.org/W2082485924"],"abstract_inverted_index":{"Automating":[0],"the":[1,9,23,60,80,95,128,146,158],"device":[2],"selection":[3,54],"in":[4,59,88,111,115,130,157],"heterogeneous":[5],"computing":[6],"platforms":[7],"requires":[8],"modelling":[10,30,173,178],"of":[11,25,82,97,127,136,139,160,169,179,191,209],"performance":[12,29,133,151,175],"both":[13],"on":[14,17,69,94,134],"CPUs":[15],"and":[16,39,153],"accelerators.":[18,162],"This":[19,91,163],"work":[20,164,204],"argues":[21],"for":[22,47,148,214],"use":[24],"a":[26,33,48,66,99,112,116,123,167,188],"hybrid":[27],"analytical":[28,150,200],"approach":[31],"is":[32,85,143],"practical":[34],"way":[35],"to":[36,42,101,144,154,172],"build":[37],"fast":[38],"efficient":[40],"methods":[41],"select":[43],"an":[44,104,192,206,210],"appropriate":[45],"target":[46,53,216],"given":[49],"computation":[50],"kernel.":[51],"The":[52,141],"problem":[55],"has":[56,64],"been":[57,65],"addressed":[58],"literature,":[61],"however":[62],"there":[63],"strong":[67],"emphasis":[68],"building":[70,98],"empirical":[71],"models":[72,152],"with":[73],"machine":[74],"learning":[75],"techniques.":[76],"We":[77],"argue":[78],"that":[79,184,196],"applicability":[81],"such":[83],"solutions":[84],"often":[86],"limited":[87],"production":[89],"systems.":[90],"paper":[92],"focus":[93],"issue":[96],"selector":[100],"decide":[102],"if":[103],"OpenMP":[105,211],"loop":[106],"nest":[107],"should":[108],"be":[109],"executed":[110],"CPU":[113],"or":[114],"GPU.":[117],"To":[118,183],"this":[119,203],"end,":[120,185],"it":[121],"offers":[122],"comprehensive":[124],"comparison":[125],"evaluation":[126],"difference":[129,194],"GPU":[131,161,174],"kernel":[132],"devices":[135],"multiple":[137],"generations":[138],"architectures.":[140],"idea":[142],"underscore":[145],"need":[147],"accurate":[149,177],"provide":[155],"insights":[156],"evolution":[159],"also":[165],"highlights":[166],"drawback":[168],"existing":[170],"approaches":[171],"-":[176],"memory":[180],"coalescing":[181],"characteristics.":[182],"we":[186],"examine":[187],"novel":[189],"application":[190],"inter-thread":[193],"analysis":[195],"can":[197],"further":[198],"improve":[199],"models.":[201],"Finally,":[202],"presents":[205],"initial":[207],"study":[208],"runtime":[212],"framework":[213],"target-offloading":[215],"selection.":[217]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
