{"id":"https://openalex.org/W2028084841","doi":"https://doi.org/10.1109/padsw.2014.7097837","title":"Performance analysis of HPC applications with irregular tree data structures","display_name":"Performance analysis of HPC applications with irregular tree data structures","publication_year":2014,"publication_date":"2014-12-01","ids":{"openalex":"https://openalex.org/W2028084841","doi":"https://doi.org/10.1109/padsw.2014.7097837","mag":"2028084841"},"language":"en","primary_location":{"id":"doi:10.1109/padsw.2014.7097837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/padsw.2014.7097837","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 20th IEEE International Conference on Parallel and Distributed Systems (ICPADS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084479278","display_name":"Ahmed Khawaja","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ahmed Khawaja","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","Department of Electrical and Computer Engineering, University of Texas at Austin, , USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, , USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101805530","display_name":"Jiajun Wang","orcid":"https://orcid.org/0000-0002-2654-0609"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiajun Wang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","Department of Electrical and Computer Engineering, University of Texas at Austin, , USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, , USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046024226","display_name":"Andreas Gerstlauer","orcid":"https://orcid.org/0000-0002-6748-2054"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Gerstlauer","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","Department of Electrical and Computer Engineering, University of Texas at Austin, , USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, , USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068885069","display_name":"Lizy K. John","orcid":"https://orcid.org/0000-0002-8747-5214"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lizy K. John","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","Department of Electrical and Computer Engineering, University of Texas at Austin, , USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of Texas at Austin, , USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091890371","display_name":"Dhairya Malhotra","orcid":"https://orcid.org/0000-0001-9567-1322"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhairya Malhotra","raw_affiliation_strings":["Institute for Computational Engineering and Science, University of Texas at Austin, Austin, Texas","Institute for Computational Engineering and Science, University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Institute for Computational Engineering and Science, University of Texas at Austin, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Institute for Computational Engineering and Science, University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044137409","display_name":"George Biros","orcid":"https://orcid.org/0000-0002-0033-3994"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Biros","raw_affiliation_strings":["Institute for Computational Engineering and Science, University of Texas at Austin, Austin, Texas","Institute for Computational Engineering and Science, University of Texas at Austin, USA"],"affiliations":[{"raw_affiliation_string":"Institute for Computational Engineering and Science, University of Texas at Austin, Austin, Texas","institution_ids":["https://openalex.org/I86519309"]},{"raw_affiliation_string":"Institute for Computational Engineering and Science, University of Texas at Austin, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5084479278"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0738675,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"17","issue":null,"first_page":"418","last_page":"425"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10739","display_name":"Electromagnetic Scattering and Analysis","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11263","display_name":"Electromagnetic Simulation and Numerical Methods","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8585492372512817},{"id":"https://openalex.org/keywords/fast-multipole-method","display_name":"Fast multipole method","score":0.6599459648132324},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6546573638916016},{"id":"https://openalex.org/keywords/x86","display_name":"x86","score":0.615389883518219},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6131888031959534},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5723007917404175},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5341538190841675},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.4946114718914032},{"id":"https://openalex.org/keywords/adaptive-mesh-refinement","display_name":"Adaptive mesh refinement","score":0.47011318802833557},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.433857798576355},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4262831211090088},{"id":"https://openalex.org/keywords/coprocessor","display_name":"Coprocessor","score":0.423662930727005},{"id":"https://openalex.org/keywords/vectorization","display_name":"Vectorization (mathematics)","score":0.42180299758911133},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.19433674216270447},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.17804691195487976},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.1447257399559021}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8585492372512817},{"id":"https://openalex.org/C135115559","wikidata":"https://www.wikidata.org/wiki/Q5437040","display_name":"Fast multipole method","level":3,"score":0.6599459648132324},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6546573638916016},{"id":"https://openalex.org/C170723468","wikidata":"https://www.wikidata.org/wiki/Q182933","display_name":"x86","level":3,"score":0.615389883518219},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6131888031959534},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5723007917404175},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5341538190841675},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.4946114718914032},{"id":"https://openalex.org/C131053463","wikidata":"https://www.wikidata.org/wiki/Q4680751","display_name":"Adaptive mesh refinement","level":2,"score":0.47011318802833557},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.433857798576355},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4262831211090088},{"id":"https://openalex.org/C86111242","wikidata":"https://www.wikidata.org/wiki/Q859595","display_name":"Coprocessor","level":2,"score":0.423662930727005},{"id":"https://openalex.org/C41681595","wikidata":"https://www.wikidata.org/wiki/Q7917855","display_name":"Vectorization (mathematics)","level":2,"score":0.42180299758911133},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.19433674216270447},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.17804691195487976},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.1447257399559021},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C52765159","wikidata":"https://www.wikidata.org/wiki/Q1027847","display_name":"Multipole expansion","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/padsw.2014.7097837","is_oa":false,"landing_page_url":"https://doi.org/10.1109/padsw.2014.7097837","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 20th IEEE International Conference on Parallel and Distributed Systems (ICPADS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W83535271","https://openalex.org/W1780270892","https://openalex.org/W1971281392","https://openalex.org/W1979313559","https://openalex.org/W1979652778","https://openalex.org/W1985685378","https://openalex.org/W1991369214","https://openalex.org/W2005267849","https://openalex.org/W2051395078","https://openalex.org/W2099541379","https://openalex.org/W2109229561","https://openalex.org/W2117926105","https://openalex.org/W2132820941","https://openalex.org/W2144433126","https://openalex.org/W2146901347","https://openalex.org/W2154790323","https://openalex.org/W3028733942","https://openalex.org/W4229665264","https://openalex.org/W4254309070","https://openalex.org/W4256386389"],"related_works":["https://openalex.org/W3215381467","https://openalex.org/W2762467749","https://openalex.org/W2915956107","https://openalex.org/W4301207796","https://openalex.org/W4846490","https://openalex.org/W2099986681","https://openalex.org/W4240878335","https://openalex.org/W2189125857","https://openalex.org/W2283652387","https://openalex.org/W2249606962"],"abstract_inverted_index":{"Adaptive":[0],"mesh":[1,115],"refinement":[2,116],"(AMR)":[3],"numerical":[4],"methods":[5],"utilizing":[6,49],"octree":[7],"data":[8],"structures":[9],"are":[10],"an":[11],"important":[12],"class":[13],"of":[14,21,31,34,37,72,109,175],"HPC":[15],"applications,":[16],"in":[17,161,179],"particular":[18],"the":[19,29,40,105,121,130,172,180],"solution":[20],"partial":[22],"differential":[23],"equations.":[24],"Much":[25],"effort":[26],"goes":[27],"into":[28],"implementation":[30],"efficient":[32],"versions":[33],"these":[35,60],"types":[36],"programs,":[38],"where":[39],"emphasis":[41],"is":[42,75,171],"often":[43],"on":[44,62,96,120,129,137],"increasing":[45],"multi-node":[46],"performance":[47,71,82,108,150,170],"when":[48,135],"GPUs":[50],"and":[51,126,145],"coprocessors.":[52],"By":[53],"contrast,":[54],"our":[55],"analysis":[56],"aims":[57],"to":[58,148,168],"characterize":[59],"workloads":[61,87],"traditional":[63],"CPUs,":[64],"as":[65,89],"we":[66,103],"believe":[67],"that":[68,154],"single-threaded":[69],"intra-node":[70],"critical":[73],"kernels":[74],"still":[76],"a":[77,138,158],"key":[78],"factor":[79],"for":[80],"achieving":[81,162],"at":[83],"scale.":[84],"Especially":[85],"irregular":[86],"such":[88],"AMR":[90],"methods,":[91],"however,":[92],"exhibit":[93],"severe":[94],"underutilization":[95],"general":[97],"purpose":[98],"processors.":[99],"In":[100],"this":[101],"paper,":[102],"analyze":[104],"single":[106],"core":[107],"two":[110],"state-of-the-art,":[111],"highly":[112],"scalable":[113],"adaptive":[114],"codes,":[117],"one":[118,127],"based":[119,128],"Fast":[122],"Multipole":[123],"Method":[124,133],"(FMM)":[125],"Finite":[131],"Element":[132],"(FEM),":[134],"running":[136],"x86":[139],"CPU.":[140],"We":[141,152],"examined":[142],"both":[143],"scalar":[144],"vectorized":[146],"implementations":[147],"identify":[149],"bottlenecks.":[151],"demonstrate":[153],"vectorization":[155],"can":[156],"provide":[157],"significant":[159],"benefit":[160],"high":[163,173],"performance.":[164],"The":[165],"greatest":[166],"bottleneck":[167],"peak":[169],"fraction":[174],"non-floating":[176],"point":[177],"instructions":[178],"kernels.":[181]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
