{"id":"https://openalex.org/W2733583685","doi":"https://doi.org/10.1109/sies.2017.7993387","title":"A performance, power, and energy efficiency analysis of load balancing techniques for GPUs","display_name":"A performance, power, and energy efficiency analysis of load balancing techniques for GPUs","publication_year":2017,"publication_date":"2017-06-01","ids":{"openalex":"https://openalex.org/W2733583685","doi":"https://doi.org/10.1109/sies.2017.7993387","mag":"2733583685"},"language":"en","primary_location":{"id":"doi:10.1109/sies.2017.7993387","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sies.2017.7993387","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 12th IEEE International Symposium on Industrial Embedded Systems (SIES)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053217543","display_name":"Federico Busato","orcid":null},"institutions":[{"id":"https://openalex.org/I119439378","display_name":"University of Verona","ror":"https://ror.org/039bp8j42","country_code":"IT","type":"education","lineage":["https://openalex.org/I119439378"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Federico Busato","raw_affiliation_strings":["Dept. of Computer Science, University of Verona, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, University of Verona, Italy","institution_ids":["https://openalex.org/I119439378"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088231786","display_name":"Nicola Bombieri","orcid":"https://orcid.org/0000-0003-3256-5885"},"institutions":[{"id":"https://openalex.org/I119439378","display_name":"University of Verona","ror":"https://ror.org/039bp8j42","country_code":"IT","type":"education","lineage":["https://openalex.org/I119439378"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Nicola Bombieri","raw_affiliation_strings":["Dept. of Computer Science, University of Verona, Italy"],"affiliations":[{"raw_affiliation_string":"Dept. of Computer Science, University of Verona, Italy","institution_ids":["https://openalex.org/I119439378"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5053217543"],"corresponding_institution_ids":["https://openalex.org/I119439378"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.08524824,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10101","display_name":"Cloud Computing and Resource Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8561986684799194},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.6670345664024353},{"id":"https://openalex.org/keywords/energy-consumption","display_name":"Energy consumption","score":0.5806832313537598},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.5727652311325073},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5690414905548096},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.4866503179073334},{"id":"https://openalex.org/keywords/microarchitecture","display_name":"Microarchitecture","score":0.4786587655544281},{"id":"https://openalex.org/keywords/multithreading","display_name":"Multithreading","score":0.47550684213638306},{"id":"https://openalex.org/keywords/dynamic-demand","display_name":"Dynamic demand","score":0.47520264983177185},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4452654719352722},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.43888312578201294},{"id":"https://openalex.org/keywords/performance-improvement","display_name":"Performance improvement","score":0.4253345727920532},{"id":"https://openalex.org/keywords/electrical-efficiency","display_name":"Electrical efficiency","score":0.41337376832962036},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.3691689372062683},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.3679112195968628},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3663296103477478},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.19033250212669373}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8561986684799194},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.6670345664024353},{"id":"https://openalex.org/C2780165032","wikidata":"https://www.wikidata.org/wiki/Q16869822","display_name":"Energy consumption","level":2,"score":0.5806832313537598},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.5727652311325073},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5690414905548096},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.4866503179073334},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.4786587655544281},{"id":"https://openalex.org/C201410400","wikidata":"https://www.wikidata.org/wiki/Q1064412","display_name":"Multithreading","level":3,"score":0.47550684213638306},{"id":"https://openalex.org/C45872418","wikidata":"https://www.wikidata.org/wiki/Q5318966","display_name":"Dynamic demand","level":3,"score":0.47520264983177185},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4452654719352722},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.43888312578201294},{"id":"https://openalex.org/C2778915421","wikidata":"https://www.wikidata.org/wiki/Q3643177","display_name":"Performance improvement","level":2,"score":0.4253345727920532},{"id":"https://openalex.org/C118993495","wikidata":"https://www.wikidata.org/wiki/Q5042828","display_name":"Electrical efficiency","level":3,"score":0.41337376832962036},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.3691689372062683},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3679112195968628},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3663296103477478},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.19033250212669373},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/sies.2017.7993387","is_oa":false,"landing_page_url":"https://doi.org/10.1109/sies.2017.7993387","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 12th IEEE International Symposium on Industrial Embedded Systems (SIES)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7","score":0.9100000262260437}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1504291959","https://openalex.org/W1770249075","https://openalex.org/W1893178273","https://openalex.org/W1985291160","https://openalex.org/W2022541337","https://openalex.org/W2033597569","https://openalex.org/W2035080386","https://openalex.org/W2050127041","https://openalex.org/W2059966434","https://openalex.org/W2069590084","https://openalex.org/W2069897533","https://openalex.org/W2103838950","https://openalex.org/W2107173440","https://openalex.org/W2109473404","https://openalex.org/W2126739950","https://openalex.org/W2502734103","https://openalex.org/W2555931724","https://openalex.org/W4246219036"],"related_works":["https://openalex.org/W3149034384","https://openalex.org/W2097410296","https://openalex.org/W2497398711","https://openalex.org/W2133675875","https://openalex.org/W2057234250","https://openalex.org/W2465756754","https://openalex.org/W2094754158","https://openalex.org/W32488798","https://openalex.org/W2164868807","https://openalex.org/W2030281207"],"abstract_inverted_index":{"Load":[0],"balancing":[1],"is":[2,19],"a":[3,75,91],"key":[4],"aspect":[5],"to":[6,49,106,127,149,206],"face":[7],"when":[8,84],"implementing":[9],"any":[10],"parallel":[11],"application":[12],"for":[13,98,165,169],"Graphic":[14],"Processing":[15],"Units":[16],"(GPUs).":[17],"It":[18],"particularly":[20],"crucial":[21],"if":[22],"one":[23,71],"considers":[24],"that":[25,69],"it":[26,65,115],"strongly":[27],"impacts":[28],"on":[29,157,162],"performance,":[30,181],"power":[31,158],"and":[32,101,111,137,159,167,176,183,197,209],"energy":[33,160,184],"efficiency":[34],"of":[35,72,82,123,153,180,203],"the":[36,47,79,118,128,132,141,151,186,193,201,204],"whole":[37],"application.":[38],"Many":[39],"different":[40,194,207,211],"partitioning":[41,100],"techniques":[42,156,199],"have":[43],"been":[44,67,96,147],"proposed":[45,97],"in":[46,86,121,131,178],"past":[48],"deal":[50],"with":[51,59,125,134],"either":[52],"very":[53,108],"regular":[54,136],"workloads":[55,61],"(static":[56],"techniques)":[57],"or":[58],"irregular":[60,138],"(dynamic":[62],"techniques).":[63],"Nevertheless,":[64],"has":[66,95,146],"proven":[68],"no":[70,144],"them":[73],"provides":[74],"sound":[76],"trade-off,":[77],"from":[78],"performance":[80,124,142],"point":[81],"view,":[83],"applied":[85],"both":[87,135,163],"cases.":[88],"More":[89],"recently,":[90],"dynamic":[92],"multi-phase":[93],"approach":[94],"workload":[99],"work":[102],"item-to-thread":[103],"allocation.":[104],"Thanks":[105],"its":[107],"low":[109],"complexity":[110],"several":[112],"architecture-oriented":[113],"optimizations,":[114],"can":[116],"provide":[117],"best":[119],"results":[120,188],"terms":[122,179],"respect":[126],"other":[129],"approaches":[130],"literature":[133],"datasets.":[139],"Besides":[140],"comparison,":[143],"analysis":[145],"conducted":[148],"show":[150],"effect":[152],"all":[154,192],"these":[155],"consumption":[161],"GPUs":[164,168],"desktop":[166],"low-power":[170,224],"embedded":[171,225],"systems.":[172],"This":[173],"paper":[174],"shows":[175],"compares,":[177],"power,":[182],"efficiency,":[185],"experimental":[187],"obtained":[189],"by":[190],"applying":[191],"static,":[195],"dynamic,":[196],"semi-dynamic":[198],"at":[200],"state":[202],"art":[205],"datasets":[208],"over":[210],"GPU":[212],"technologies":[213],"(i.e.,":[214],"NVIDIA":[215,220],"Maxwell":[216],"GTX":[217],"980":[218],"device,":[219],"Jetson":[221],"Kepler":[222],"TK1":[223],"system).":[226]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
