{"id":"https://openalex.org/W1616824231","doi":"https://doi.org/10.1145/2687415","title":"Cross-Loop Optimization of Arithmetic Intensity for Finite Element Local Assembly","display_name":"Cross-Loop Optimization of Arithmetic Intensity for Finite Element Local Assembly","publication_year":2015,"publication_date":"2015-01-09","ids":{"openalex":"https://openalex.org/W1616824231","doi":"https://doi.org/10.1145/2687415","mag":"1616824231"},"language":"en","primary_location":{"id":"doi:10.1145/2687415","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2687415","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2687415","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/2687415","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Fabio Luporini","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Fabio Luporini","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Ana Lucia Varbanescu","orcid":null},"institutions":[{"id":"https://openalex.org/I887064364","display_name":"University of Amsterdam","ror":"https://ror.org/04dkp9463","country_code":"NL","type":"education","lineage":["https://openalex.org/I887064364"]}],"countries":["NL"],"is_corresponding":false,"raw_author_name":"Ana Lucia Varbanescu","raw_affiliation_strings":["University of Amsterdam"],"affiliations":[{"raw_affiliation_string":"University of Amsterdam","institution_ids":["https://openalex.org/I887064364"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Florian Rathgeber","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Florian Rathgeber","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Gheorghe-Teodor Bercea","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Gheorghe-Teodor Bercea","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"middle","author":{"id":null,"display_name":"J. Ramanujam","orcid":null},"institutions":[{"id":"https://openalex.org/I121820613","display_name":"Louisiana State University","ror":"https://ror.org/05ect4e57","country_code":"US","type":"education","lineage":["https://openalex.org/I121820613"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. Ramanujam","raw_affiliation_strings":["Louisiana State University"],"affiliations":[{"raw_affiliation_string":"Louisiana State University","institution_ids":["https://openalex.org/I121820613"]}]},{"author_position":"middle","author":{"id":null,"display_name":"David A. Ham","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"David A. Ham","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]},{"author_position":"last","author":{"id":null,"display_name":"Paul H. J. Kelly","orcid":null},"institutions":[{"id":"https://openalex.org/I47508984","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10","country_code":"GB","type":"education","lineage":["https://openalex.org/I47508984"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Paul H. J. Kelly","raw_affiliation_strings":["Imperial College London"],"affiliations":[{"raw_affiliation_string":"Imperial College London","institution_ids":["https://openalex.org/I47508984"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I47508984"],"apc_list":null,"apc_paid":null,"fwci":6.9247,"has_fulltext":true,"cited_by_count":34,"citation_normalized_percentile":{"value":0.97219344,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"11","issue":"4","first_page":"1","last_page":"25"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6926000118255615,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.6926000118255615,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10126","display_name":"Logic, programming, and type systems","score":0.19050000607967377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.05429999902844429,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6916000247001648},{"id":"https://openalex.org/keywords/program-optimization","display_name":"Program optimization","score":0.5149000287055969},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.4945000112056732},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4799000024795532},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.47839999198913574},{"id":"https://openalex.org/keywords/optimizing-compiler","display_name":"Optimizing compiler","score":0.4528000056743622},{"id":"https://openalex.org/keywords/finite-element-method","display_name":"Finite element method","score":0.4487999975681305},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.4453999996185303},{"id":"https://openalex.org/keywords/affine-transformation","display_name":"Affine transformation","score":0.4309000074863434},{"id":"https://openalex.org/keywords/element","display_name":"Element (criminal law)","score":0.4239000082015991}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019000291824341},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6916000247001648},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6115999817848206},{"id":"https://openalex.org/C139571649","wikidata":"https://www.wikidata.org/wiki/Q1156793","display_name":"Program optimization","level":3,"score":0.5149000287055969},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.4945000112056732},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4799000024795532},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.47839999198913574},{"id":"https://openalex.org/C190902152","wikidata":"https://www.wikidata.org/wiki/Q1325106","display_name":"Optimizing compiler","level":3,"score":0.4528000056743622},{"id":"https://openalex.org/C135628077","wikidata":"https://www.wikidata.org/wiki/Q220184","display_name":"Finite element method","level":2,"score":0.4487999975681305},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.4453999996185303},{"id":"https://openalex.org/C92757383","wikidata":"https://www.wikidata.org/wiki/Q382497","display_name":"Affine transformation","level":2,"score":0.4309000074863434},{"id":"https://openalex.org/C200288055","wikidata":"https://www.wikidata.org/wiki/Q2621792","display_name":"Element (criminal law)","level":2,"score":0.4239000082015991},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3986000120639801},{"id":"https://openalex.org/C50831359","wikidata":"https://www.wikidata.org/wiki/Q165436","display_name":"Assembly language","level":3,"score":0.3968000113964081},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.3862999975681305},{"id":"https://openalex.org/C2781172179","wikidata":"https://www.wikidata.org/wiki/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.3684999942779541},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.34380000829696655},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.33739998936653137},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.3172000050544739},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.30979999899864197},{"id":"https://openalex.org/C128916667","wikidata":"https://www.wikidata.org/wiki/Q1343660","display_name":"Register allocation","level":3,"score":0.3082999885082245},{"id":"https://openalex.org/C29331672","wikidata":"https://www.wikidata.org/wiki/Q3354468","display_name":"Loop optimization","level":4,"score":0.3077000081539154},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.301800012588501},{"id":"https://openalex.org/C200833197","wikidata":"https://www.wikidata.org/wiki/Q333707","display_name":"Compile time","level":3,"score":0.3003000020980835},{"id":"https://openalex.org/C2778241615","wikidata":"https://www.wikidata.org/wiki/Q83303","display_name":"Fortran","level":2,"score":0.299699991941452},{"id":"https://openalex.org/C58646249","wikidata":"https://www.wikidata.org/wiki/Q127380","display_name":"Abstract syntax tree","level":3,"score":0.29179999232292175},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.2827000021934509},{"id":"https://openalex.org/C111564260","wikidata":"https://www.wikidata.org/wiki/Q4288856","display_name":"Interprocedural optimization","level":5,"score":0.2768000066280365},{"id":"https://openalex.org/C115168132","wikidata":"https://www.wikidata.org/wiki/Q55813","display_name":"Machine code","level":3,"score":0.27070000767707825},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.26660001277923584},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C149629883","wikidata":"https://www.wikidata.org/wiki/Q660926","display_name":"Fraction (chemistry)","level":2,"score":0.2565000057220459},{"id":"https://openalex.org/C42058472","wikidata":"https://www.wikidata.org/wiki/Q810214","display_name":"Base (topology)","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C8767382","wikidata":"https://www.wikidata.org/wiki/Q1058454","display_name":"Dynamic compilation","level":3,"score":0.25130000710487366}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2687415","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2687415","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2687415","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1407.0904","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1407.0904","pdf_url":"https://arxiv.org/pdf/1407.0904","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:repository.lsu.edu:eecs_pubs-2578","is_oa":false,"landing_page_url":"https://repository.lsu.edu/eecs_pubs/1576","pdf_url":null,"source":{"id":"https://openalex.org/S4210169993","display_name":"Civil War Book Review","issn_l":"1528-6592","issn":["1528-6592"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310315936","host_organization_name":"Louisiana State University","host_organization_lineage":["https://openalex.org/P4310315936"],"host_organization_lineage_names":["Louisiana State University"],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Faculty Publications","raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/2687415","is_oa":true,"landing_page_url":"https://doi.org/10.1145/2687415","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/2687415","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1278550217","display_name":"Platform: Underpinning Technologies for Finite Element Simulation","funder_award_id":"EP/L000407/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G1532236644","display_name":null,"funder_award_id":"926687","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2249403966","display_name":null,"funder_award_id":"EP/I00677X/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G254850637","display_name":"Collaborative Research: An Environment for Portable High Productivity High Performance Computing on GPUs/Accelerators","funder_award_id":"0926687","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2868230561","display_name":null,"funder_award_id":"00678","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3014361174","display_name":null,"funder_award_id":"NE/K006789/1","funder_id":"https://openalex.org/F4320320022","funder_display_name":"Sight Research UK"},{"id":"https://openalex.org/G323769019","display_name":"II-NEW: Research Software Infrastructure for Tensor Computations","funder_award_id":"1059417","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3330119427","display_name":null,"funder_award_id":"EPSRC","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3515392198","display_name":null,"funder_award_id":"811457","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4844738447","display_name":"Sustainable domain-specific software generation tools for extremely parallel particle-based simulations","funder_award_id":"EP/I006761/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G5861081951","display_name":"Multi-layered abstractions for PDEs","funder_award_id":"EP/I00677X/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G6286740375","display_name":"Collaborative Research:  CPA-CPL-T:   An Effective Automatic Parallelization Framework for Multi-Core Architectures","funder_award_id":"0811457","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7215042158","display_name":null,"funder_award_id":"NE/K008951/1","funder_id":"https://openalex.org/F4320334631","funder_display_name":"Natural Environment Research Council"},{"id":"https://openalex.org/G7246797574","display_name":"Gung Ho Phase 2","funder_award_id":"NE/K006789/1","funder_id":"https://openalex.org/F4320334631","funder_display_name":"Natural Environment Research Council"},{"id":"https://openalex.org/G731814487","display_name":null,"funder_award_id":"EP/I006761/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7554227835","display_name":null,"funder_award_id":"EP/L000407/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G8057849231","display_name":null,"funder_award_id":"NE/K008951/1","funder_id":"https://openalex.org/F4320320022","funder_display_name":"Sight Research UK"},{"id":"https://openalex.org/G8273334653","display_name":null,"funder_award_id":"NE/K006789/1","funder_id":"https://openalex.org/F4320334631","funder_display_name":"Natural Environment Research Council"},{"id":"https://openalex.org/G8537917186","display_name":"Abstracting the environment: automating geoscientific simulation","funder_award_id":"NE/K008951/1","funder_id":"https://openalex.org/F4320334631","funder_display_name":"Natural Environment Research Council"},{"id":"https://openalex.org/G913687718","display_name":null,"funder_award_id":"NE/K006789/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320310395","display_name":"Louisiana State University","ror":"https://ror.org/05ect4e57"},{"id":"https://openalex.org/F4320320022","display_name":"Sight Research UK","ror":"https://ror.org/03z2py885"},{"id":"https://openalex.org/F4320320283","display_name":"Imperial College London","ror":"https://ror.org/041kmwe10"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334631","display_name":"Natural Environment Research Council","ror":"https://ror.org/02b5d8509"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W1616824231.pdf","grobid_xml":"https://content.openalex.org/works/W1616824231.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W168487160","https://openalex.org/W1966827540","https://openalex.org/W1976110206","https://openalex.org/W1978064247","https://openalex.org/W1979457157","https://openalex.org/W1979836673","https://openalex.org/W1993079359","https://openalex.org/W2008060144","https://openalex.org/W2030859425","https://openalex.org/W2034761517","https://openalex.org/W2038646072","https://openalex.org/W2071430609","https://openalex.org/W2100418324","https://openalex.org/W2102076631","https://openalex.org/W2102182691","https://openalex.org/W2107979247","https://openalex.org/W2136952590","https://openalex.org/W2139710482","https://openalex.org/W2151764765","https://openalex.org/W2154968583","https://openalex.org/W2159846564","https://openalex.org/W4233358870","https://openalex.org/W4245654886"],"related_works":[],"abstract_inverted_index":{"We":[0],"study":[1],"and":[2,52,118,154],"systematically":[3],"evaluate":[4],"a":[5,21,37,82,93,108,132],"class":[6],"of":[7,24,55,71,81,134,139,151,157],"composable":[8],"code":[9,125,160],"transformations":[10,161],"that":[11,143],"improve":[12],"arithmetic":[13],"intensity":[14],"in":[15,28,78],"local":[16,87],"assembly":[17,88],"operations,":[18],"which":[19,58],"represent":[20],"significant":[22,144],"fraction":[23],"the":[25,48,53,79,152,155,158],"execution":[26],"time":[27],"finite":[29,97,136],"element":[30,98,137],"methods.":[31,99],"Their":[32],"performance":[33,145],"optimization":[34],"is":[35,147,165],"indeed":[36],"challenging":[38],"issue.":[39],"Even":[40],"though":[41],"affine":[42],"loop":[43],"nests":[44],"are":[45],"generally":[46],"present,":[47],"short":[49],"trip":[50],"counts":[51],"complexity":[54,141],"mathematical":[56],"expressions,":[57],"vary":[59],"among":[60],"different":[61],"problems,":[62],"make":[63],"it":[64,122],"hard":[65],"to":[66,162],"determine":[67],"an":[68],"optimal":[69],"sequence":[70],"successful":[72],"transformations.":[73],"Our":[74],"investigation":[75],"has":[76],"resulted":[77],"implementation":[80],"compiler":[83,101],"(called":[84],"COFFEE)":[85],"for":[86,95,115],"kernels,":[89],"fully":[90],"integrated":[91],"with":[92],"framework":[94],"developing":[96],"The":[100,149],"manipulates":[102],"abstract":[103],"syntax":[104],"trees":[105],"generated":[106],"from":[107],"domain-specific":[109],"language":[110],"by":[111],"introducing":[112],"domain-aware":[113],"optimizations":[114],"instruction-level":[116],"parallelism":[117],"register":[119],"locality.":[120],"Eventually,":[121],"produces":[123],"C":[124],"including":[126],"vector":[127],"SIMD":[128],"intrinsics.":[129],"Experiments":[130],"using":[131],"range":[133],"real-world":[135],"problems":[138],"increasing":[140],"show":[142],"improvement":[146],"achieved.":[148],"generality":[150],"approach":[153],"applicability":[156],"proposed":[159],"other":[163],"domains":[164],"also":[166],"discussed.":[167]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":6},{"year":2016,"cited_by_count":6},{"year":2015,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2016-06-24T00:00:00"}
