{"id":"https://openalex.org/W3000651133","doi":"https://doi.org/10.1109/igsc48788.2019.8957171","title":"Performance of Floating-point Intensive Kernels on Low-power Processor \u2013 A Case Study with Geodesic Distance Kernel","display_name":"Performance of Floating-point Intensive Kernels on Low-power Processor \u2013 A Case Study with Geodesic Distance Kernel","publication_year":2019,"publication_date":"2019-10-01","ids":{"openalex":"https://openalex.org/W3000651133","doi":"https://doi.org/10.1109/igsc48788.2019.8957171","mag":"3000651133"},"language":"en","primary_location":{"id":"doi:10.1109/igsc48788.2019.8957171","is_oa":false,"landing_page_url":"http://doi.org/10.1109/igsc48788.2019.8957171","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Tenth International Green and Sustainable Computing Conference (IGSC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101790023","display_name":"Zheming Jin","orcid":"https://orcid.org/0000-0002-7197-780X"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheming Jin","raw_affiliation_strings":["Leadership Computing Facility Argonne National Laboratory,Lemont,IL,USA,9700 Cass Avenue"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Leadership Computing Facility Argonne National Laboratory,Lemont,IL,USA,9700 Cass Avenue","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089114949","display_name":"Paulius Velesko","orcid":null},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Paulius Velesko","raw_affiliation_strings":["Leadership Computing Facility Argonne National Laboratory,Lemont,IL,USA,9700 Cass Avenue"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Leadership Computing Facility Argonne National Laboratory,Lemont,IL,USA,9700 Cass Avenue","institution_ids":["https://openalex.org/I1282105669"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5050512119","display_name":"Hal Finkel","orcid":"https://orcid.org/0000-0002-7551-7122"},"institutions":[{"id":"https://openalex.org/I1282105669","display_name":"Argonne National Laboratory","ror":"https://ror.org/05gvnxz63","country_code":"US","type":"facility","lineage":["https://openalex.org/I1282105669","https://openalex.org/I1330989302","https://openalex.org/I39565521","https://openalex.org/I40347166"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hal Finkel","raw_affiliation_strings":["Leadership Computing Facility Argonne National Laboratory,Lemont,IL,USA,9700 Cass Avenue"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Leadership Computing Facility Argonne National Laboratory,Lemont,IL,USA,9700 Cass Avenue","institution_ids":["https://openalex.org/I1282105669"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19296024,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11697","display_name":"Numerical Methods and Algorithms","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.7338863611221313},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.733786940574646},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.7044514417648315},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.625495970249176},{"id":"https://openalex.org/keywords/xeon","display_name":"Xeon","score":0.6004011631011963},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.5151140093803406},{"id":"https://openalex.org/keywords/single-precision-floating-point-format","display_name":"Single-precision floating-point format","score":0.48335808515548706},{"id":"https://openalex.org/keywords/double-precision-floating-point-format","display_name":"Double-precision floating-point format","score":0.43413469195365906},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.17884328961372375},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.14549103379249573},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09860962629318237}],"concepts":[{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.7338863611221313},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.733786940574646},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.7044514417648315},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.625495970249176},{"id":"https://openalex.org/C145108525","wikidata":"https://www.wikidata.org/wiki/Q656154","display_name":"Xeon","level":2,"score":0.6004011631011963},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.5151140093803406},{"id":"https://openalex.org/C133095886","wikidata":"https://www.wikidata.org/wiki/Q1307173","display_name":"Single-precision floating-point format","level":3,"score":0.48335808515548706},{"id":"https://openalex.org/C35912277","wikidata":"https://www.wikidata.org/wiki/Q1243369","display_name":"Double-precision floating-point format","level":3,"score":0.43413469195365906},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.17884328961372375},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.14549103379249573},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09860962629318237},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/igsc48788.2019.8957171","is_oa":false,"landing_page_url":"http://doi.org/10.1109/igsc48788.2019.8957171","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 Tenth International Green and Sustainable Computing Conference (IGSC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1902930330","https://openalex.org/W1984222112","https://openalex.org/W2076039939","https://openalex.org/W2291533962","https://openalex.org/W2465123170","https://openalex.org/W2475663704","https://openalex.org/W2581070787","https://openalex.org/W2746871167","https://openalex.org/W2753382199","https://openalex.org/W2786263629","https://openalex.org/W2800159407","https://openalex.org/W2805203449","https://openalex.org/W2886841777","https://openalex.org/W4249203549","https://openalex.org/W4249219201","https://openalex.org/W6748392146","https://openalex.org/W6991883878"],"related_works":["https://openalex.org/W3150370983","https://openalex.org/W2239119680","https://openalex.org/W1564887326","https://openalex.org/W3215589575","https://openalex.org/W2116803521","https://openalex.org/W3150959508","https://openalex.org/W1571090276","https://openalex.org/W2773283032","https://openalex.org/W1973800584","https://openalex.org/W2265064666"],"abstract_inverted_index":{"A":[0],"processor,":[1],"with":[2,79],"a":[3,6,14,32,45,53,66,72,80],"GPU":[4,26,34,150,186,228],"and":[5,40,102,105,109,118,139,153,168,240],"CPU":[7,117],"integrated":[8,25,59],"on":[9,115,144,170,184,194,203],"the":[10,50,77,84,88,91,95,107,112,116,130,134,137,156,160,163,185,195,199,204,214,219,226,237,248],"same":[11],"chip,":[12],"is":[13,27,44,210],"promising":[15],"low-power":[16],"system":[17],"for":[18,236,247],"floating-":[19],"point":[20],"intensive":[21,55,69,201],"applications.":[22],"While":[23],"an":[24,58,127,145,171],"not":[28],"designed":[29],"to":[30,36,47,126,213,233,244],"outperform":[31],"discrete":[33],"due":[35],"its":[37],"power,":[38],"area,":[39],"thermal":[41,215],"constraints,":[42],"there":[43],"need":[46],"better":[48],"understand":[49],"performance":[51,108,135,161],"of":[52,87,111,136,162,218],"floating-point":[54,68,98,131,141,180,200,222],"kernel":[56,70,78,89,113,202],"using":[57],"GPU.":[60,119],"Toward":[61],"this":[62],"end,":[63],"we":[64],"choose":[65],"representative":[67],"as":[71],"case":[73],"study.":[74],"We":[75],"port":[76],"vendor-neutral":[81],"framework,":[82],"analyze":[83],"compiler":[85],"optimizations":[86,132,157],"at":[90],"assembly":[92],"code,":[93],"evaluate":[94],"relationship":[96],"between":[97],"operations":[99,181],"per":[100,182],"second":[101,183],"arithmetic":[103,190],"intensity,":[104],"compare":[106],"power":[110,216,229],"implementations":[114],"Our":[120],"key":[121],"findings":[122],"are:":[123],"1)":[124],"Compared":[125],"un-optimized":[128],"kernel,":[129,239],"improve":[133,159],"single-":[138],"double-precision":[140,238],"kernels":[142,165],"executing":[143],"Intel\u00ae":[146,172],"GEN8":[147],"Iris":[148],"Pro":[149],"by":[151,166],"15.4X":[152],"5.4X,":[154],"respectively;":[155],"also":[158],"two":[164],"5.6X":[167],"3.4X":[169],"Xeon\u00ae":[173],"E3":[174],"CPU,":[175],"respectively.":[176],"2)":[177],"Achieving":[178],"peak":[179],"requires":[187],"much":[188],"higher":[189],"intensity":[191],"than":[192],"that":[193],"CPU.":[196],"3)":[197],"Running":[198],"processor":[205],"consumes":[206],"48":[207],"Watts,":[208],"which":[209],"very":[211],"close":[212],"draw":[217],"processor.":[220],"The":[221],"optimization":[223],"can":[224],"reduce":[225],"average":[227],"from":[230,241],"35.7":[231],"W":[232,235,243,246],"22.7":[234],"33.1":[242],"8.8":[245],"single-precision":[249],"kernel.":[250]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
