{"id":"https://openalex.org/W2132147136","doi":"https://doi.org/10.2312/pgv.20141085","title":"Auto-Tuning Complex Array Layouts for GPUs","display_name":"Auto-Tuning Complex Array Layouts for GPUs","publication_year":2014,"publication_date":"2014-01-01","ids":{"openalex":"https://openalex.org/W2132147136","doi":"https://doi.org/10.2312/pgv.20141085","mag":"2132147136"},"language":"en","primary_location":{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:78066","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Konferenzver\u00f6ffentlichung"},"type":"article","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.2312/pgv.20141085","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110023001","display_name":"Nicolas Weber","orcid":"https://orcid.org/0000-0003-0032-9511"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Weber, Nicolas","raw_affiliation_strings":["[TU, Darmstadt, Germany]"],"affiliations":[{"raw_affiliation_string":"[TU, Darmstadt, Germany]","institution_ids":["https://openalex.org/I31512782"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066521879","display_name":"Michael Goesele","orcid":"https://orcid.org/0000-0002-0944-0980"},"institutions":[{"id":"https://openalex.org/I31512782","display_name":"Technical University of Darmstadt","ror":"https://ror.org/05n911h24","country_code":"DE","type":"education","lineage":["https://openalex.org/I31512782"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Goesele, Michael","raw_affiliation_strings":["[TU, Darmstadt, Germany]"],"affiliations":[{"raw_affiliation_string":"[TU, Darmstadt, Germany]","institution_ids":["https://openalex.org/I31512782"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5110023001"],"corresponding_institution_ids":["https://openalex.org/I31512782"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"57","last_page":"64"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8768222332000732},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.7169942855834961},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.6364263296127319},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6270829439163208},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5738346576690674},{"id":"https://openalex.org/keywords/general-purpose-computing-on-graphics-processing-units","display_name":"General-purpose computing on graphics processing units","score":0.5372071266174316},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.521247923374176},{"id":"https://openalex.org/keywords/cuda","display_name":"CUDA","score":0.5069725513458252},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4691556692123413},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.4274599850177765},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3419504463672638},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.1709333062171936},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11548402905464172}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8768222332000732},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.7169942855834961},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.6364263296127319},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6270829439163208},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5738346576690674},{"id":"https://openalex.org/C50630238","wikidata":"https://www.wikidata.org/wiki/Q971505","display_name":"General-purpose computing on graphics processing units","level":3,"score":0.5372071266174316},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.521247923374176},{"id":"https://openalex.org/C2778119891","wikidata":"https://www.wikidata.org/wiki/Q477690","display_name":"CUDA","level":2,"score":0.5069725513458252},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4691556692123413},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.4274599850177765},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3419504463672638},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.1709333062171936},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11548402905464172},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:78066","is_oa":false,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"Konferenzver\u00f6ffentlichung"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.641.2747","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.641.2747","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.gris.informatik.tu-darmstadt.de/~mgoesele/download/Weber_2014_MAT.pdf","raw_type":"text"},{"id":"doi:10.2312/pgv.20141085","is_oa":true,"landing_page_url":"https://doi.org/10.2312/pgv.20141085","pdf_url":null,"source":{"id":"https://openalex.org/S7407052899","display_name":"Eurographics","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"},{"id":"mag:2132147136","is_oa":false,"landing_page_url":"https://dblp.uni-trier.de/db/conf/egpgv/egpgv2014.html#WeberG14","pdf_url":null,"source":{"id":"https://openalex.org/S4306418300","display_name":"Eurographics Workshop on Parallel Graphics and Visualization","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Eurographics Workshop on Parallel Graphics and Visualization","raw_type":null}],"best_oa_location":{"id":"doi:10.2312/pgv.20141085","is_oa":true,"landing_page_url":"https://doi.org/10.2312/pgv.20141085","pdf_url":null,"source":{"id":"https://openalex.org/S7407052899","display_name":"Eurographics","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W172770979","https://openalex.org/W1547909858","https://openalex.org/W1994869533","https://openalex.org/W2011159963","https://openalex.org/W2033088400","https://openalex.org/W2055312318","https://openalex.org/W2085118703","https://openalex.org/W2104329103","https://openalex.org/W2104637563","https://openalex.org/W2113282196","https://openalex.org/W2113643095","https://openalex.org/W2120934608","https://openalex.org/W2125833438","https://openalex.org/W2128539477","https://openalex.org/W2132598718","https://openalex.org/W2138163628","https://openalex.org/W2139935536","https://openalex.org/W2141389982","https://openalex.org/W2144033344","https://openalex.org/W2144511445","https://openalex.org/W2161376430","https://openalex.org/W2268696613","https://openalex.org/W2504987011","https://openalex.org/W2532040979"],"related_works":["https://openalex.org/W2272866099","https://openalex.org/W2206155066","https://openalex.org/W2110202147","https://openalex.org/W95017943","https://openalex.org/W2026048928","https://openalex.org/W1677907378","https://openalex.org/W2155942864","https://openalex.org/W3028001473","https://openalex.org/W2027534015","https://openalex.org/W2099160152","https://openalex.org/W60847499","https://openalex.org/W2140375692","https://openalex.org/W1580632765","https://openalex.org/W3114481667","https://openalex.org/W2165022798","https://openalex.org/W2729851701","https://openalex.org/W2061433738","https://openalex.org/W2751874067","https://openalex.org/W2555018742","https://openalex.org/W2011813543"],"abstract_inverted_index":{"The":[0],"continuing":[1],"evolution":[2],"of":[3,157],"Graphics":[4],"Processing":[5],"Units":[6],"(GPU)":[7],"has":[8],"shown":[9],"rapid":[10],"performance":[11],"increases":[12],"over":[13],"the":[14,22,41,85,109,116,125,129,134,143],"years.":[15],"But":[16],"with":[17],"each":[18],"new":[19,56,91],"hardware":[20,38],"generation,":[21],"constraints":[23],"for":[24,112],"programming":[25],"them":[26],"efficiently":[27],"have":[28,31],"changed.":[29],"Programs":[30],"to":[32,39,53,65,69,79,83,93,123,149,154],"be":[33],"tuned":[34],"towards":[35],"one":[36],"specific":[37,105],"unleash":[40],"full":[42],"potential.":[43],"This":[44,121],"is":[45,62,147],"time":[46],"consuming":[47],"and":[48,118,159],"costly":[49],"as":[50,136],"vendors":[51],"tend":[52],"release":[54],"a":[55,90],"generation":[57],"every":[58],"18":[59],"months.":[60],"It":[61],"therefore":[63],"important":[64],"auto-tune":[66],"GPU":[67,119],"code":[68,126],"achieve":[70],"GPU-specific":[71],"improvements.":[72],"Using":[73],"either":[74],"static":[75],"or":[76,82],"empirical":[77],"profiling":[78],"adjust":[80],"parameters":[81],"change":[84],"kernel":[86],"implementation.":[87],"We":[88],"introduce":[89],"approach":[92],"automatically":[94],"improve":[95],"memory":[96,110,130],"access":[97,111],"on":[98,115],"GPUs.":[99],"Our":[100,145],"system":[101],"generates":[102],"an":[103,155],"application":[104],"library":[106],"which":[107],"abstracts":[108],"complex":[113],"arrays":[114],"host":[117],"side.":[120],"allows":[122],"optimize":[124],"by":[127],"exchanging":[128],"layout":[131],"without":[132],"recompiling":[133],"application,":[135],"all":[137],"necessary":[138],"layouts":[139],"are":[140],"pre-compiled":[141],"into":[142],"library.":[144],"implementation":[146],"able":[148],"speedup":[150],"real-world":[151],"applications":[152],"up":[153],"order":[156],"magnitude":[158],"even":[160],"outperforms":[161],"hand-tuned":[162],"implementations.":[163]},"counts_by_year":[{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
