{"id":"https://openalex.org/W2075135178","doi":"https://doi.org/10.4018/jghpc.2012070103","title":"Optimizing Techniques for OpenCL Programs on Heterogeneous Platforms","display_name":"Optimizing Techniques for OpenCL Programs on Heterogeneous Platforms","publication_year":2012,"publication_date":"2012-07-01","ids":{"openalex":"https://openalex.org/W2075135178","doi":"https://doi.org/10.4018/jghpc.2012070103","mag":"2075135178"},"language":"en","primary_location":{"id":"doi:10.4018/jghpc.2012070103","is_oa":false,"landing_page_url":"https://doi.org/10.4018/jghpc.2012070103","pdf_url":null,"source":{"id":"https://openalex.org/S43307632","display_name":"International Journal of Grid and High Performance Computing","issn_l":"1938-0259","issn":["1938-0259","1938-0267"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320424","host_organization_name":"IGI Global","host_organization_lineage":["https://openalex.org/P4310320424"],"host_organization_lineage_names":["IGI Global"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Grid and High Performance Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038121796","display_name":"Slo\u2010Li Chu","orcid":"https://orcid.org/0000-0003-3999-7293"},"institutions":[{"id":"https://openalex.org/I151221077","display_name":"Chung Yuan Christian University","ror":"https://ror.org/02w8ws377","country_code":"TW","type":"education","lineage":["https://openalex.org/I151221077"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Slo-Li Chu","raw_affiliation_strings":["Chung Yuan Christian University, Taiwan"],"affiliations":[{"raw_affiliation_string":"Chung Yuan Christian University, Taiwan","institution_ids":["https://openalex.org/I151221077"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5109919399","display_name":"Chih-Chieh Hsiao","orcid":null},"institutions":[{"id":"https://openalex.org/I151221077","display_name":"Chung Yuan Christian University","ror":"https://ror.org/02w8ws377","country_code":"TW","type":"education","lineage":["https://openalex.org/I151221077"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chih-Chieh Hsiao","raw_affiliation_strings":["Chung Yuan Christian University, Taiwan"],"affiliations":[{"raw_affiliation_string":"Chung Yuan Christian University, Taiwan","institution_ids":["https://openalex.org/I151221077"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038121796"],"corresponding_institution_ids":["https://openalex.org/I151221077"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13425145,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"4","issue":"3","first_page":"48","last_page":"62"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8879850506782532},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7247660160064697},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6609383225440979},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6011331081390381},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.57518470287323},{"id":"https://openalex.org/keywords/symmetric-multiprocessor-system","display_name":"Symmetric multiprocessor system","score":0.5512186884880066},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5416626334190369},{"id":"https://openalex.org/keywords/memory-bandwidth","display_name":"Memory bandwidth","score":0.5013580322265625},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.47170042991638184},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.4606897830963135},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4557596445083618},{"id":"https://openalex.org/keywords/parallelism","display_name":"Parallelism (grammar)","score":0.4402344822883606},{"id":"https://openalex.org/keywords/data-parallelism","display_name":"Data parallelism","score":0.42003774642944336},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18016231060028076},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.10628977417945862}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8879850506782532},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7247660160064697},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6609383225440979},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6011331081390381},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.57518470287323},{"id":"https://openalex.org/C172430144","wikidata":"https://www.wikidata.org/wiki/Q17111997","display_name":"Symmetric multiprocessor system","level":2,"score":0.5512186884880066},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5416626334190369},{"id":"https://openalex.org/C188045654","wikidata":"https://www.wikidata.org/wiki/Q17148339","display_name":"Memory bandwidth","level":2,"score":0.5013580322265625},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.47170042991638184},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.4606897830963135},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4557596445083618},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4402344822883606},{"id":"https://openalex.org/C61483411","wikidata":"https://www.wikidata.org/wiki/Q3124522","display_name":"Data parallelism","level":3,"score":0.42003774642944336},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18016231060028076},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.10628977417945862},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.4018/jghpc.2012070103","is_oa":false,"landing_page_url":"https://doi.org/10.4018/jghpc.2012070103","pdf_url":null,"source":{"id":"https://openalex.org/S43307632","display_name":"International Journal of Grid and High Performance Computing","issn_l":"1938-0259","issn":["1938-0259","1938-0267"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320424","host_organization_name":"IGI Global","host_organization_lineage":["https://openalex.org/P4310320424"],"host_organization_lineage_names":["IGI Global"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Grid and High Performance Computing","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:igg:jghpc0:v:4:y:2012:i:3:p:48-62","is_oa":false,"landing_page_url":"http://services.igi-global.com/resolvedoi/resolve.aspx?doi=10.4018/jghpc.2012070103","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1976979937","https://openalex.org/W1977313606","https://openalex.org/W1991297274","https://openalex.org/W1992851788","https://openalex.org/W2028914809","https://openalex.org/W2031781358","https://openalex.org/W2128022558","https://openalex.org/W2620440553"],"related_works":["https://openalex.org/W1554644772","https://openalex.org/W2003935582","https://openalex.org/W2950520577","https://openalex.org/W3209384898","https://openalex.org/W74409296","https://openalex.org/W1595834484","https://openalex.org/W2089690534","https://openalex.org/W3170887803","https://openalex.org/W1991844655","https://openalex.org/W2468095077"],"abstract_inverted_index":{"Heterogeneous":[0],"platforms":[1,71],"that":[2],"are":[3,11,66,88],"consisted":[4],"of":[5,57,82,94,104],"CPU":[6],"and":[7,26,76,85],"add-on":[8,19,41],"streaming":[9,52],"processors":[10,20,53],"widely":[12],"used":[13],"in":[14],"modern":[15],"computer":[16],"systems.":[17],"These":[18],"provide":[21],"substantially":[22],"more":[23],"computation":[24],"capability":[25],"memory":[27],"bandwidth":[28],"than":[29],"conventional":[30],"multi-cores":[31],"platforms.":[32,128],"General-purpose":[33],"computations":[34],"can":[35,119],"also":[36],"be":[37,98],"leveraged":[38],"onto":[39],"these":[40,51,83,105],"processors.":[42],"In":[43],"order":[44],"to":[45,72,100,102,116,123],"utilize":[46],"their":[47,58],"potential":[48],"performance,":[49],"programming":[50],"is":[54],"challenging":[55],"because":[56],"diverse":[59],"underlying":[60],"architectural":[61,80],"characteristics.":[62],"Several":[63],"optimization":[64,86,106],"techniques":[65,84],"applied":[67],"on":[68,125],"OpenCL-compatible":[69],"heterogeneous":[70],"achieve":[73],"thread-level,":[74],"data-level,":[75],"instruction-level":[77],"parallelism.":[78],"The":[79,108],"implications":[81],"principles":[87],"discussed.":[89],"Finally,":[90],"a":[91],"case":[92],"study":[93],"MRI-Q":[95],"benchmark":[96],"will":[97],"addressed":[99],"illustrate":[101],"capabilities":[103],"techniques.":[107],"experimental":[109],"results":[110],"reveal":[111],"the":[112],"speedup":[113],"from":[114,121],"non-optimized":[115],"optimized":[117],"kernel":[118],"vary":[120],"8":[122],"63":[124],"different":[126],"target":[127]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
