{"id":"https://openalex.org/W3090742253","doi":"https://doi.org/10.1145/3410463.3414641","title":"TAFE","display_name":"TAFE","publication_year":2020,"publication_date":"2020-09-30","ids":{"openalex":"https://openalex.org/W3090742253","doi":"https://doi.org/10.1145/3410463.3414641","mag":"3090742253"},"language":"en","primary_location":{"id":"doi:10.1145/3410463.3414641","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3410463.3414641","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5049343138","display_name":"Kishore Punniyamurthy","orcid":"https://orcid.org/0009-0006-2311-8417"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kishore Punniyamurthy","raw_affiliation_strings":["The University of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046024226","display_name":"Andreas Gerstlauer","orcid":"https://orcid.org/0000-0002-6748-2054"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Gerstlauer","raw_affiliation_strings":["The University of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5049343138"],"corresponding_institution_ids":["https://openalex.org/I86519309"],"apc_list":null,"apc_paid":null,"fwci":0.9241,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.72938444,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"17","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10829","display_name":"Interconnection Networks and Systems","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9097551107406616},{"id":"https://openalex.org/keywords/thread","display_name":"Thread (computing)","score":0.7035799026489258},{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.5251402854919434},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.48991554975509644},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.4528523087501526},{"id":"https://openalex.org/keywords/dynamic-data","display_name":"Dynamic data","score":0.4142512381076813},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.39599424600601196},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.38365620374679565},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3271472752094269},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.21752268075942993}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9097551107406616},{"id":"https://openalex.org/C138101251","wikidata":"https://www.wikidata.org/wiki/Q213092","display_name":"Thread (computing)","level":2,"score":0.7035799026489258},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.5251402854919434},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.48991554975509644},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.4528523087501526},{"id":"https://openalex.org/C197298091","wikidata":"https://www.wikidata.org/wiki/Q5318963","display_name":"Dynamic data","level":2,"score":0.4142512381076813},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.39599424600601196},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.38365620374679565},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3271472752094269},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.21752268075942993},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3410463.3414641","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3410463.3414641","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W1980390491","https://openalex.org/W1984689820","https://openalex.org/W1988307172","https://openalex.org/W2025567609","https://openalex.org/W2054818614","https://openalex.org/W2070232376","https://openalex.org/W2090409324","https://openalex.org/W2125980577","https://openalex.org/W2156114981","https://openalex.org/W2157802978","https://openalex.org/W2418984255","https://openalex.org/W2555826082","https://openalex.org/W2572767837","https://openalex.org/W2625200202","https://openalex.org/W2761710529","https://openalex.org/W2766505687","https://openalex.org/W3009233884","https://openalex.org/W4233093232","https://openalex.org/W4251384337","https://openalex.org/W4300461182"],"related_works":["https://openalex.org/W2033778626","https://openalex.org/W2026780467","https://openalex.org/W2187110187","https://openalex.org/W2018697868","https://openalex.org/W2792315845","https://openalex.org/W2002371119","https://openalex.org/W2152099439","https://openalex.org/W4390492895","https://openalex.org/W2886225742","https://openalex.org/W3168833002"],"abstract_inverted_index":{"In":[0,62],"multi-GPU":[1],"and":[2,24,32,111],"multi-chiplet":[3],"GPU":[4,78],"systems":[5],"exhibiting":[6],"NUMA":[7],"behavior,":[8],"information":[9],"about":[10,37],"addresses":[11],"accessed":[12],"by":[13],"threads":[14],"is":[15],"crucial":[16],"for":[17,70],"various":[18],"optimizations":[19],"such":[20],"as":[21],"data/thread":[22],"co-location":[23],"cache/scratchpad":[25],"memory":[26],"management.":[27],"To":[28],"make":[29],"optimal":[30],"decisions":[31],"avoid":[33],"runtime":[34],"overhead,":[35],"knowledge":[36],"dynamic,":[38],"potentially":[39],"data-dependent":[40],"access":[41],"patterns":[42],"should":[43],"be":[44],"available":[45],"before":[46],"kernel":[47,99],"execution.":[48],"Existing":[49],"approaches":[50],"require":[51],"rewriting":[52],"of":[53,77],"applications":[54],"or":[55],"can":[56],"only":[57],"capture":[58],"static,":[59],"data-independent":[60],"patterns.":[61],"this":[63],"paper,":[64],"we":[65],"propose":[66,102],"TAFE,":[67],"a":[68,103],"framework":[69],"accurate":[71],"dynamic":[72,88,109],"thread":[73],"address":[74,84,95],"footprint":[75],"estimation":[76],"applications.":[79],"TAFE":[80],"combines":[81],"minimal":[82],"static":[83],"pattern":[85],"annotations":[86],"with":[87],"data":[89],"dependency":[90],"tracking":[91],"to":[92,98,107,118],"compute":[93],"threadblock-specific":[94],"footprints":[96],"prior":[97],"launch.":[100],"We":[101],"low-overhead":[104],"software":[105],"mechanism":[106],"track":[108],"data-dependencies":[110],"provide":[112],"an":[113],"optional":[114],"lightweight":[115],"hardware":[116],"extension":[117],"support":[119],"transparent":[120],"tracking.":[121]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2020-10-08T00:00:00"}
