{"id":"https://openalex.org/W4230614322","doi":"https://doi.org/10.1109/micro.2016.7783715","title":"Efficient kernel synthesis for performance portable programming","display_name":"Efficient kernel synthesis for performance portable programming","publication_year":2016,"publication_date":"2016-10-01","ids":{"openalex":"https://openalex.org/W4230614322","doi":"https://doi.org/10.1109/micro.2016.7783715"},"language":"en","primary_location":{"id":"doi:10.1109/micro.2016.7783715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783715","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043931807","display_name":"Li\u2010Wen Chang","orcid":"https://orcid.org/0000-0001-6515-6733"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Li-Wen Chang","raw_affiliation_strings":["University of Illinois Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076404852","display_name":"Izzat El Hajj","orcid":"https://orcid.org/0000-0003-3356-6898"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Izzat El Hajj","raw_affiliation_strings":["University of Illinois Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087702701","display_name":"Christopher Rodrigues","orcid":null},"institutions":[{"id":"https://openalex.org/I2250955327","display_name":"Huawei Technologies (China)","ror":"https://ror.org/00cmhce21","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250955327"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Christopher Rodrigues","raw_affiliation_strings":["Huawei America Research Lab"],"affiliations":[{"raw_affiliation_string":"Huawei America Research Lab","institution_ids":["https://openalex.org/I2250955327"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044416322","display_name":"Juan G\u00f3mez-Luna","orcid":"https://orcid.org/0000-0002-6514-1571"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Juan Gomez-Luna","raw_affiliation_strings":["Universidad de C\u00f3rdoba"],"affiliations":[{"raw_affiliation_string":"Universidad de C\u00f3rdoba","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040404999","display_name":"Wen\u2010mei Hwu","orcid":"https://orcid.org/0000-0003-2532-5349"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wen-mei Hwu","raw_affiliation_strings":["University of Illinois Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5043931807"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":2.2073,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.88047771,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8269769549369812},{"id":"https://openalex.org/keywords/software-portability","display_name":"Software portability","score":0.7607332468032837},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.6314671039581299},{"id":"https://openalex.org/keywords/microarchitecture","display_name":"Microarchitecture","score":0.6013912558555603},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.52366703748703},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.4576079547405243},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.44670379161834717},{"id":"https://openalex.org/keywords/programming-paradigm","display_name":"Programming paradigm","score":0.4454805254936218},{"id":"https://openalex.org/keywords/extensibility","display_name":"Extensibility","score":0.4326014816761017},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.431222528219223},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.41405269503593445},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3713988661766052},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.28968265652656555},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.25568437576293945},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.15215426683425903}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8269769549369812},{"id":"https://openalex.org/C63000827","wikidata":"https://www.wikidata.org/wiki/Q3080428","display_name":"Software portability","level":2,"score":0.7607332468032837},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.6314671039581299},{"id":"https://openalex.org/C107598950","wikidata":"https://www.wikidata.org/wiki/Q259864","display_name":"Microarchitecture","level":2,"score":0.6013912558555603},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.52366703748703},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.4576079547405243},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.44670379161834717},{"id":"https://openalex.org/C34165917","wikidata":"https://www.wikidata.org/wiki/Q188267","display_name":"Programming paradigm","level":2,"score":0.4454805254936218},{"id":"https://openalex.org/C32833848","wikidata":"https://www.wikidata.org/wiki/Q4115054","display_name":"Extensibility","level":2,"score":0.4326014816761017},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.431222528219223},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.41405269503593445},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3713988661766052},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.28968265652656555},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.25568437576293945},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.15215426683425903},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/micro.2016.7783715","is_oa":false,"landing_page_url":"https://doi.org/10.1109/micro.2016.7783715","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2016 49th Annual IEEE/ACM International Symposium on Microarchitecture (MICRO)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Affordable and clean energy","id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320332222","display_name":"University of Illinois at Urbana-Champaign","ror":"https://ror.org/047426m28"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":37,"referenced_works":["https://openalex.org/W38257615","https://openalex.org/W1528437110","https://openalex.org/W1964031104","https://openalex.org/W1964704819","https://openalex.org/W1991592471","https://openalex.org/W1997978901","https://openalex.org/W2000873501","https://openalex.org/W2003183734","https://openalex.org/W2019708326","https://openalex.org/W2027235232","https://openalex.org/W2028914809","https://openalex.org/W2061313045","https://openalex.org/W2073061372","https://openalex.org/W2080592089","https://openalex.org/W2088866486","https://openalex.org/W2091553595","https://openalex.org/W2092703362","https://openalex.org/W2093843662","https://openalex.org/W2109473404","https://openalex.org/W2115148068","https://openalex.org/W2143609451","https://openalex.org/W2147193503","https://openalex.org/W2149234156","https://openalex.org/W2154697693","https://openalex.org/W2163491234","https://openalex.org/W2294241027","https://openalex.org/W2296864832","https://openalex.org/W3021820648","https://openalex.org/W3145506805","https://openalex.org/W4231595696","https://openalex.org/W4246166885","https://openalex.org/W4251164127","https://openalex.org/W4251637954","https://openalex.org/W4256213682","https://openalex.org/W6631884302","https://openalex.org/W6673829052","https://openalex.org/W6832800064"],"related_works":["https://openalex.org/W2944886731","https://openalex.org/W2051015362","https://openalex.org/W64075999","https://openalex.org/W2090801845","https://openalex.org/W2100800440","https://openalex.org/W4240345607","https://openalex.org/W2654359861","https://openalex.org/W2111416043","https://openalex.org/W4243399827","https://openalex.org/W2088827150"],"abstract_inverted_index":{"The":[0],"diversity":[1],"of":[2,30,57,103,162,169,172],"microarchitecture":[3],"designs":[4],"in":[5,20],"heterogeneous":[6],"computing":[7,59],"systems":[8],"allows":[9],"programs":[10],"to":[11,99],"achieve":[12],"high":[13],"performance":[14,37,50,171],"and":[15,76,115,148,160,181],"energy":[16],"efficiency,":[17],"but":[18],"results":[19],"substantial":[21],"software":[22],"re-development":[23],"cost":[24],"for":[25,83,124,136,157],"each":[26],"type":[27],"or":[28],"generation":[29],"hardware.":[31],"To":[32,61],"mitigate":[33],"this":[34,63],"cost,":[35],"a":[36,68,101,110],"portable":[38],"programming":[39],"system":[40],"is":[41,53,88,107],"required.":[42],"One":[43],"fundamental":[44],"difference":[45],"between":[46],"architectures":[47],"that":[48,72,95,118,152],"makes":[49],"portability":[51],"challenging":[52],"the":[54,137,170],"hierarchical":[55],"organization":[56],"their":[58],"elements.":[60],"address":[62],"challenge,":[64],"we":[65],"introduce":[66],"TANGRAM,":[67],"kernel":[69],"synthesis":[70],"framework":[71],"composes":[73],"architecture-neutral":[74],"computations":[75],"composition":[77,116,122],"rules":[78],"into":[79],"high-performance":[80],"kernels":[81],"customized":[82],"different":[84,158],"architectural":[85,93],"hierarchies.":[86],"TANGRAM":[87,156],"based":[89],"on":[90],"an":[91],"extensible":[92],"model":[94,106],"can":[96,119],"be":[97],"used":[98],"specify":[100],"variety":[102],"architectures.":[104],"This":[105],"coupled":[108],"with":[109],"generic":[111],"design":[112],"space":[113],"exploration":[114],"algorithm":[117],"generate":[120],"multiple":[121],"plans":[123,135],"any":[125],"specified":[126],"architecture.":[127],"A":[128],"custom":[129],"code":[130,153],"generator":[131],"then":[132],"compiles":[133],"these":[134],"target":[138],"architecture":[139],"while":[140],"performing":[141],"various":[142],"optimizations":[143],"such":[144,177],"as":[145,178],"data":[146],"placement":[147],"tuning.":[149],"We":[150],"show":[151],"synthesized":[154],"by":[155],"types":[159],"generations":[161],"devices":[163],"achieves":[164],"no":[165],"less":[166],"than":[167],"70%":[168],"highly":[173],"optimized":[174],"vendor":[175],"libraries":[176],"Intel":[179],"MKL":[180],"NVIDIA":[182],"CUBLAS/CUSPARSE.":[183]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-25T21:42:39.735039","created_date":"2025-10-10T00:00:00"}
