{"id":"https://openalex.org/W2887312645","doi":"https://doi.org/10.1109/ipdpsw.2018.00172","title":"AutoTuneTMP: Auto-Tuning in C++ With Runtime Template Metaprogramming","display_name":"AutoTuneTMP: Auto-Tuning in C++ With Runtime Template Metaprogramming","publication_year":2018,"publication_date":"2018-05-01","ids":{"openalex":"https://openalex.org/W2887312645","doi":"https://doi.org/10.1109/ipdpsw.2018.00172","mag":"2887312645"},"language":"en","primary_location":{"id":"doi:10.1109/ipdpsw.2018.00172","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2018.00172","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022231761","display_name":"David Pfander","orcid":null},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"David Pfander","raw_affiliation_strings":["Institute for Parallel and Distributed Systems, University of Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Parallel and Distributed Systems, University of Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050780494","display_name":"Malte Brunn","orcid":null},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Malte Brunn","raw_affiliation_strings":["Institute for Parallel and Distributed Systems, University of Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Parallel and Distributed Systems, University of Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041326099","display_name":"Dirk Pfl\u00fcger","orcid":"https://orcid.org/0000-0002-4360-0212"},"institutions":[{"id":"https://openalex.org/I100066346","display_name":"University of Stuttgart","ror":"https://ror.org/04vnq7t77","country_code":"DE","type":"education","lineage":["https://openalex.org/I100066346"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dirk Pfluger","raw_affiliation_strings":["Institute for Parallel and Distributed Systems, University of Stuttgart, Germany"],"affiliations":[{"raw_affiliation_string":"Institute for Parallel and Distributed Systems, University of Stuttgart, Germany","institution_ids":["https://openalex.org/I100066346"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5022231761"],"corresponding_institution_ids":["https://openalex.org/I100066346"],"apc_list":null,"apc_paid":null,"fwci":0.2632,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.52138043,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1123","last_page":"1132"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metaprogramming","display_name":"Metaprogramming","score":0.9486416578292847},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7663160562515259},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.686742901802063},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.3649234473705292}],"concepts":[{"id":"https://openalex.org/C35390924","wikidata":"https://www.wikidata.org/wiki/Q661075","display_name":"Metaprogramming","level":2,"score":0.9486416578292847},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7663160562515259},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.686742901802063},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3649234473705292}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipdpsw.2018.00172","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipdpsw.2018.00172","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Parallel and Distributed Processing Symposium Workshops (IPDPSW)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.47999998927116394,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W1480958225","https://openalex.org/W1598340254","https://openalex.org/W1964031104","https://openalex.org/W1979875412","https://openalex.org/W2016618963","https://openalex.org/W2025437939","https://openalex.org/W2067913710","https://openalex.org/W2078794610","https://openalex.org/W2096070062","https://openalex.org/W2099625934","https://openalex.org/W2100218206","https://openalex.org/W2135653967","https://openalex.org/W2135682468","https://openalex.org/W2136628731","https://openalex.org/W2142673658","https://openalex.org/W2151788546","https://openalex.org/W2154786353","https://openalex.org/W2158626113","https://openalex.org/W2163491234","https://openalex.org/W2260734724","https://openalex.org/W2314944927","https://openalex.org/W2412589610","https://openalex.org/W2484342675","https://openalex.org/W2506485539","https://openalex.org/W2727949054","https://openalex.org/W3136479147","https://openalex.org/W3146039865","https://openalex.org/W4237817413","https://openalex.org/W4247050054","https://openalex.org/W4251164127","https://openalex.org/W6680007323","https://openalex.org/W6682805176","https://openalex.org/W6714852264","https://openalex.org/W6832800064"],"related_works":["https://openalex.org/W123754615","https://openalex.org/W2293709606","https://openalex.org/W16365832","https://openalex.org/W2127759428","https://openalex.org/W1535099794","https://openalex.org/W201808208","https://openalex.org/W4320807203","https://openalex.org/W2769814451","https://openalex.org/W2073905442","https://openalex.org/W2968873290"],"abstract_inverted_index":{"Careful":[0],"tuning":[1,37],"of":[2,22,26,106,145,150,170,190,221],"code":[3],"is":[4,78,154,182],"crucial":[5],"to":[6,44,47,51,56,60,79,101,130,174,184,188,202,223,232],"obtain":[7],"near-optimal":[8,134],"runtime":[9,98],"performance":[10,58,62,144,193],"on":[11,194,204,225,234],"the":[12,52,140,148,191,226,235],"hardware":[13,18,54],"at":[14,93,147],"hand.":[15],"How-ever,":[16],"current":[17],"platforms":[19],"pose":[20],"plenty":[21],"challenges:":[23],"multiple":[24],"levels":[25],"cache,":[27],"many":[28],"cores":[29],"and":[30,39,59,83,110,135,143,164,200,214,230],"increasingly":[31],"wider":[32],"vector":[33],"units":[34],"render":[35],"manual":[36],"time-consuming":[38],"cumbersome.":[40],"Thus,":[41],"algorithms":[42,132],"have":[43],"be":[45,91,128,175],"able":[46,183],"automatically":[48,185],"tune":[49],"themselves":[50],"underlying":[53],"platform":[55,229],"maximize":[57],"ensure":[61],"portability.":[63],"In":[64],"this":[65,97,124],"work,":[66],"we":[67,217],"present":[68],"AutoTuneTMP,":[69],"a":[70,157,160,166,195,205,219],"novel":[71],"C++-based":[72],"auto-tuning":[73],"library.":[74],"Its":[75],"unique":[76],"strength":[77],"combine":[80],"template":[81],"metaprogramming":[82,99],"just-in-time":[84],"(JIT)":[85],"compilation":[86],"so":[87],"that":[88],"templates":[89],"can":[90,127],"instantiated":[92],"runtime.":[94],"We":[95,138],"use":[96],"approach":[100,126],"provide":[102],"an":[103,211],"extensible":[104],"set":[105,169],"parameterized":[107],"template-based":[108],"optimizations":[109],"data":[111],"structures":[112],"for":[113,121],"writing":[114],"tunable":[115,172],"kernels.":[116],"Together":[117],"with":[118,133],"convenience":[119],"functionality":[120],"parameter":[122],"tuning,":[123],"lightweight":[125],"used":[129],"implement":[131],"portable":[136],"performance.":[137],"demonstrate":[139],"applicability,":[141],"usefulness":[142],"AutoTuneTMP":[146,181],"example":[149],"matrix":[151],"multiplication.":[152],"It":[153],"well-suited":[155],"as":[156],"demonstrator,":[158],"being":[159],"well-known":[161],"compute":[162],"kernel":[163],"exhibiting":[165],"rather":[167],"large":[168],"(nine)":[171],"parameters":[173],"optimized.":[176],"With":[177],"simple":[178],"search":[179],"algorithms,":[180],"achieve":[186],"up":[187,201,222,231],"90%":[189],"peak":[192],"Xeon":[196,206,227,236],"Silver":[197,228],"4116":[198],"processor":[199],"34%":[203],"Phi":[207,237],"7210.":[208],"Starting":[209],"from":[210],"already":[212],"parallelized":[213],"vectorized":[215],"baseline,":[216],"obtained":[218],"speedup":[220],"3.1x":[224],"6.7x":[233],"platform.":[238]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-25T13:04:00.132906","created_date":"2025-10-10T00:00:00"}
