{"id":"https://openalex.org/W3000837919","doi":"https://doi.org/10.1145/3431731","title":"A Simple Model for Portable and Fast Prediction of Execution Time and Power Consumption of GPU Kernels","display_name":"A Simple Model for Portable and Fast Prediction of Execution Time and Power Consumption of GPU Kernels","publication_year":2020,"publication_date":"2020-12-30","ids":{"openalex":"https://openalex.org/W3000837919","doi":"https://doi.org/10.1145/3431731","mag":"3000837919"},"language":"en","primary_location":{"id":"doi:10.1145/3431731","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3431731","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3431731","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3431731","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039504688","display_name":"Lorenz Braun","orcid":"https://orcid.org/0000-0002-4051-8950"},"institutions":[{"id":"https://openalex.org/I223822909","display_name":"Heidelberg University","ror":"https://ror.org/038t36y30","country_code":"DE","type":"education","lineage":["https://openalex.org/I223822909"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Lorenz Braun","raw_affiliation_strings":["Institute of Computer Engineering, Heidelberg University, Germany","Heidelberg University"],"raw_orcid":"https://orcid.org/0000-0002-4051-8950","affiliations":[{"raw_affiliation_string":"Institute of Computer Engineering, Heidelberg University, Germany","institution_ids":["https://openalex.org/I223822909"]},{"raw_affiliation_string":"Heidelberg University","institution_ids":["https://openalex.org/I223822909"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053045508","display_name":"Sotirios Nikas","orcid":null},"institutions":[{"id":"https://openalex.org/I223822909","display_name":"Heidelberg University","ror":"https://ror.org/038t36y30","country_code":"DE","type":"education","lineage":["https://openalex.org/I223822909"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sotirios Nikas","raw_affiliation_strings":["Engineering Mathematics and Computing Lab, Heidelberg University, Germany","Heidelberg University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Engineering Mathematics and Computing Lab, Heidelberg University, Germany","institution_ids":["https://openalex.org/I223822909"]},{"raw_affiliation_string":"Heidelberg University","institution_ids":["https://openalex.org/I223822909"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100709122","display_name":"Chen Song","orcid":"https://orcid.org/0000-0003-3775-8184"},"institutions":[{"id":"https://openalex.org/I223822909","display_name":"Heidelberg University","ror":"https://ror.org/038t36y30","country_code":"DE","type":"education","lineage":["https://openalex.org/I223822909"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Chen Song","raw_affiliation_strings":["Engineering Mathematics and Computing Lab, Heidelberg University, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Engineering Mathematics and Computing Lab, Heidelberg University, Germany","institution_ids":["https://openalex.org/I223822909"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058920681","display_name":"Vincent Heuveline","orcid":"https://orcid.org/0000-0002-2217-7558"},"institutions":[{"id":"https://openalex.org/I223822909","display_name":"Heidelberg University","ror":"https://ror.org/038t36y30","country_code":"DE","type":"education","lineage":["https://openalex.org/I223822909"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Vincent Heuveline","raw_affiliation_strings":["Engineering Mathematics and Computing Lab, Heidelberg University, Germany","Heidelberg University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Engineering Mathematics and Computing Lab, Heidelberg University, Germany","institution_ids":["https://openalex.org/I223822909"]},{"raw_affiliation_string":"Heidelberg University","institution_ids":["https://openalex.org/I223822909"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5014251134","display_name":"Holger Fr\u00f6ning","orcid":"https://orcid.org/0000-0001-9562-0680"},"institutions":[{"id":"https://openalex.org/I223822909","display_name":"Heidelberg University","ror":"https://ror.org/038t36y30","country_code":"DE","type":"education","lineage":["https://openalex.org/I223822909"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Holger Fr\u00f6ning","raw_affiliation_strings":["Institute of Computer Engineering, Heidelberg University, Germany","Heidelberg University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Computer Engineering, Heidelberg University, Germany","institution_ids":["https://openalex.org/I223822909"]},{"raw_affiliation_string":"Heidelberg University","institution_ids":["https://openalex.org/I223822909"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5039504688"],"corresponding_institution_ids":["https://openalex.org/I223822909"],"apc_list":null,"apc_paid":null,"fwci":1.4143,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.796419,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"18","issue":"1","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7793877720832825},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.6467916369438171},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6319612264633179},{"id":"https://openalex.org/keywords/power-consumption","display_name":"Power consumption","score":0.6184006333351135},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.5979406237602234},{"id":"https://openalex.org/keywords/execution-time","display_name":"Execution time","score":0.5448980927467346},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5389092564582825},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5042589902877808},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.48937028646469116},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.42692112922668457},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.4216662645339966},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.29996761679649353},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11471277475357056},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.09116438031196594}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7793877720832825},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6467916369438171},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6319612264633179},{"id":"https://openalex.org/C2984118289","wikidata":"https://www.wikidata.org/wiki/Q29954","display_name":"Power consumption","level":3,"score":0.6184006333351135},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.5979406237602234},{"id":"https://openalex.org/C2989134064","wikidata":"https://www.wikidata.org/wiki/Q288510","display_name":"Execution time","level":2,"score":0.5448980927467346},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5389092564582825},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5042589902877808},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.48937028646469116},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.42692112922668457},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.4216662645339966},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.29996761679649353},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11471277475357056},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.09116438031196594},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3431731","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3431731","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3431731","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2001.07104","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2001.07104","pdf_url":"https://arxiv.org/pdf/2001.07104","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2001.07104","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2001.07104","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:3000837919","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":null,"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":null}],"best_oa_location":{"id":"doi:10.1145/3431731","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3431731","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3431731","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6499999761581421,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3000837919.pdf","grobid_xml":"https://content.openalex.org/works/W3000837919.grobid-xml"},"referenced_works_count":54,"referenced_works":["https://openalex.org/W1509843845","https://openalex.org/W1567646530","https://openalex.org/W1599871777","https://openalex.org/W1968244180","https://openalex.org/W1978969737","https://openalex.org/W1987121730","https://openalex.org/W1988797755","https://openalex.org/W2030553163","https://openalex.org/W2033597569","https://openalex.org/W2038666141","https://openalex.org/W2043083835","https://openalex.org/W2045271686","https://openalex.org/W2054224993","https://openalex.org/W2080592089","https://openalex.org/W2080951248","https://openalex.org/W2098290747","https://openalex.org/W2101234009","https://openalex.org/W2102849319","https://openalex.org/W2113282196","https://openalex.org/W2124833832","https://openalex.org/W2128120785","https://openalex.org/W2130336316","https://openalex.org/W2131218797","https://openalex.org/W2135120170","https://openalex.org/W2144264070","https://openalex.org/W2149234156","https://openalex.org/W2154776925","https://openalex.org/W2167334577","https://openalex.org/W2170059648","https://openalex.org/W2273440736","https://openalex.org/W2314321304","https://openalex.org/W2336547356","https://openalex.org/W2526627475","https://openalex.org/W2559932280","https://openalex.org/W2611990469","https://openalex.org/W2735065300","https://openalex.org/W2740376603","https://openalex.org/W2751871758","https://openalex.org/W2794513473","https://openalex.org/W2798724095","https://openalex.org/W2891772212","https://openalex.org/W2899387410","https://openalex.org/W2911964244","https://openalex.org/W2921788688","https://openalex.org/W2930604630","https://openalex.org/W2948929415","https://openalex.org/W2962843187","https://openalex.org/W2963926583","https://openalex.org/W2984189761","https://openalex.org/W2997591727","https://openalex.org/W3015338905","https://openalex.org/W3100970744","https://openalex.org/W4240341008","https://openalex.org/W4250047567"],"related_works":["https://openalex.org/W3115175310","https://openalex.org/W2762077810","https://openalex.org/W2910063209","https://openalex.org/W3037110041","https://openalex.org/W3002400545","https://openalex.org/W2741888411","https://openalex.org/W2923029190","https://openalex.org/W2754990314","https://openalex.org/W2794513473","https://openalex.org/W3018696818","https://openalex.org/W156843270","https://openalex.org/W2998901775","https://openalex.org/W2945969444","https://openalex.org/W2916922955","https://openalex.org/W2803142888","https://openalex.org/W2051744938","https://openalex.org/W2532279925","https://openalex.org/W3206423390","https://openalex.org/W2315649786","https://openalex.org/W2983655274"],"abstract_inverted_index":{"Characterizing":[0],"compute":[1,45],"kernel":[2],"execution":[3],"behavior":[4],"on":[5,39],"GPUs":[6,29],"for":[7,73,77,86],"efficient":[8],"task":[9],"scheduling":[10],"is":[11,36],"a":[12,19,64,87],"non-trivial":[13],"task.":[14],"We":[15],"address":[16],"this":[17],"with":[18],"simple":[20],"model":[21,35,59],"enabling":[22],"portable":[23],"and":[24,54,75,93],"fast":[25],"predictions":[26],"among":[27],"different":[28,82],"using":[30,42,61],"only":[31],"hardware-independent":[32],"features.":[33],"This":[34],"built":[37],"based":[38],"random":[40],"forests":[41],"189":[43],"individual":[44],"kernels":[46],"from":[47],"benchmarks":[48],"such":[49],"as":[50],"Parboil,":[51],"Rodinia,":[52],"Polybench-GPU,":[53],"SHOC.":[55],"Evaluation":[56],"of":[57,71],"the":[58],"performance":[60],"cross-validation":[62],"yields":[63],"median":[65],"Mean":[66],"Average":[67],"Percentage":[68],"Error":[69],"(MAPE)":[70],"8.86\u201352.0%":[72],"time":[74],"1.84\u20132.94%":[76],"power":[78],"prediction":[79,89],"across":[80],"five":[81],"GPUs,":[83],"while":[84],"latency":[85],"single":[88],"varies":[90],"between":[91],"15":[92],"108":[94],"ms.":[95]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
