{"id":"https://openalex.org/W3090487264","doi":"https://doi.org/10.1145/3410463.3414632","title":"Fireiron","display_name":"Fireiron","publication_year":2020,"publication_date":"2020-09-30","ids":{"openalex":"https://openalex.org/W3090487264","doi":"https://doi.org/10.1145/3410463.3414632","mag":"3090487264"},"language":"en","primary_location":{"id":"doi:10.1145/3410463.3414632","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3410463.3414632","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3410463.3414632","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3410463.3414632","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075117421","display_name":"Bastian Hagedorn","orcid":"https://orcid.org/0000-0003-1833-5514"},"institutions":[{"id":"https://openalex.org/I22465464","display_name":"University of M\u00fcnster","ror":"https://ror.org/00pd74e08","country_code":"DE","type":"education","lineage":["https://openalex.org/I22465464"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Bastian Hagedorn","raw_affiliation_strings":["University of M\u00fcnster, M\u00fcnster, Germany"],"affiliations":[{"raw_affiliation_string":"University of M\u00fcnster, M\u00fcnster, Germany","institution_ids":["https://openalex.org/I22465464"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012221490","display_name":"Archibald Samuel Elliott","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Archibald Samuel Elliott","raw_affiliation_strings":["lowRISC, Cambridge, United Kingdom"],"affiliations":[{"raw_affiliation_string":"lowRISC, Cambridge, United Kingdom","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047788967","display_name":"Henrik Barthels","orcid":"https://orcid.org/0000-0001-6744-3605"},"institutions":[{"id":"https://openalex.org/I887968799","display_name":"RWTH Aachen University","ror":"https://ror.org/04xfq0f34","country_code":"DE","type":"education","lineage":["https://openalex.org/I887968799"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Henrik Barthels","raw_affiliation_strings":["RWTH Aachen University, Aachen, Germany"],"affiliations":[{"raw_affiliation_string":"RWTH Aachen University, Aachen, Germany","institution_ids":["https://openalex.org/I887968799"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085160432","display_name":"Rastislav Bod\u00edk","orcid":"https://orcid.org/0000-0001-6639-1647"},"institutions":[{"id":"https://openalex.org/I201448701","display_name":"University of Washington","ror":"https://ror.org/00cvxb145","country_code":"US","type":"education","lineage":["https://openalex.org/I201448701"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rastislav Bodik","raw_affiliation_strings":["University of Washington, Seattle, WA, USA"],"affiliations":[{"raw_affiliation_string":"University of Washington, Seattle, WA, USA","institution_ids":["https://openalex.org/I201448701"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077691647","display_name":"Vinod Grover","orcid":null},"institutions":[{"id":"https://openalex.org/I4210127875","display_name":"Nvidia (United States)","ror":"https://ror.org/03jdj4y14","country_code":"US","type":"company","lineage":["https://openalex.org/I4210127875"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vinod Grover","raw_affiliation_strings":["NVIDIA Corporation, Redmond, WA, USA"],"affiliations":[{"raw_affiliation_string":"NVIDIA Corporation, Redmond, WA, USA","institution_ids":["https://openalex.org/I4210127875"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5075117421"],"corresponding_institution_ids":["https://openalex.org/I22465464"],"apc_list":null,"apc_paid":null,"fwci":2.593,"has_fulltext":true,"cited_by_count":26,"citation_normalized_percentile":{"value":0.9000423,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"71","last_page":"82"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9951000213623047,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8959966897964478},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.8135197162628174},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6659345626831055},{"id":"https://openalex.org/keywords/implementation","display_name":"Implementation","score":0.6317269802093506},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5587368607521057},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.47819069027900696},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.4481026530265808},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4472912549972534},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4381089508533478},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4322066307067871},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.4320198893547058},{"id":"https://openalex.org/keywords/data-structure","display_name":"Data structure","score":0.4210563600063324},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.21010076999664307},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.1007811427116394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8959966897964478},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.8135197162628174},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6659345626831055},{"id":"https://openalex.org/C26713055","wikidata":"https://www.wikidata.org/wiki/Q245962","display_name":"Implementation","level":2,"score":0.6317269802093506},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5587368607521057},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.47819069027900696},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.4481026530265808},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4472912549972534},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4381089508533478},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4322066307067871},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.4320198893547058},{"id":"https://openalex.org/C162319229","wikidata":"https://www.wikidata.org/wiki/Q175263","display_name":"Data structure","level":2,"score":0.4210563600063324},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.21010076999664307},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.1007811427116394},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3410463.3414632","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3410463.3414632","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3410463.3414632","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3410463.3414632","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3410463.3414632","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3410463.3414632","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM International Conference on Parallel Architectures and Compilation Techniques","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5600000023841858,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G1006881031","display_name":null,"funder_award_id":"CONIX","funder_id":"https://openalex.org/F4320306087","funder_display_name":"Semiconductor Research Corporation"},{"id":"https://openalex.org/G1103137783","display_name":"SI2-SSE: Algorithms and Tools for Data-Driven Executable Biology","funder_award_id":"1535191","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G2233570209","display_name":null,"funder_award_id":"1936731","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3785592558","display_name":null,"funder_award_id":"FA8750-16-2-0032","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G4651870101","display_name":null,"funder_award_id":"OIA-1936731","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4713059963","display_name":null,"funder_award_id":"FA8750","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G4767762227","display_name":null,"funder_award_id":"CCF-1918027","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4829893526","display_name":null,"funder_award_id":"FA8750--14--C--0011, FA8750--16--2--0032","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G490593687","display_name":null,"funder_award_id":"Intel","funder_id":"https://openalex.org/F4320306087","funder_display_name":"Semiconductor Research Corporation"},{"id":"https://openalex.org/G5292072049","display_name":"FMitF: Track II: Programming by Demonstration for the Browser with Applications in Data Science","funder_award_id":"1918027","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6724302884","display_name":null,"funder_award_id":"FA8750-14-C-0011","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G6894402473","display_name":null,"funder_award_id":"Fellowship","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6947076996","display_name":null,"funder_award_id":"FA8750-16-2-003","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G731481035","display_name":null,"funder_award_id":"1723352","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306087","display_name":"Semiconductor Research Corporation","ror":"https://ror.org/047z4n946"},{"id":"https://openalex.org/F4320307102","display_name":"Intel Corporation","ror":"https://ror.org/01ek73717"},{"id":"https://openalex.org/F4320307786","display_name":"Adobe Systems","ror":"https://ror.org/059tvcg64"},{"id":"https://openalex.org/F4320308258","display_name":"Qualcomm","ror":"https://ror.org/002zrf773"},{"id":"https://openalex.org/F4320308737","display_name":"Facebook","ror":"https://ror.org/01zbnvs85"},{"id":"https://openalex.org/F4320309327","display_name":"Google","ror":"https://ror.org/00njsd438"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3090487264.pdf","grobid_xml":"https://content.openalex.org/works/W3090487264.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1989988531","https://openalex.org/W2055312318","https://openalex.org/W2077143534","https://openalex.org/W2090409324","https://openalex.org/W2100218206","https://openalex.org/W2135106356","https://openalex.org/W2136952590","https://openalex.org/W2160183719","https://openalex.org/W2594730095","https://openalex.org/W2644985445","https://openalex.org/W2772612468","https://openalex.org/W2786320458","https://openalex.org/W2791258966","https://openalex.org/W2806891462","https://openalex.org/W2888024215","https://openalex.org/W2898231337","https://openalex.org/W2914500262","https://openalex.org/W2949251082","https://openalex.org/W2949967139","https://openalex.org/W2961619211","https://openalex.org/W2987973366","https://openalex.org/W3007772124","https://openalex.org/W3008964021"],"related_works":["https://openalex.org/W4240253816","https://openalex.org/W3096456556","https://openalex.org/W2120447654","https://openalex.org/W650988184","https://openalex.org/W2162410319","https://openalex.org/W2137356287","https://openalex.org/W4321184925","https://openalex.org/W60915090","https://openalex.org/W2025840053","https://openalex.org/W2161692994"],"abstract_inverted_index":{"High":[0],"GPU":[1,78,106,160],"performance":[2,51,98],"can":[3,137],"only":[4],"be":[5,36,119,138],"achieved":[6],"if":[7],"a":[8,93],"kernel":[9],"efficiently":[10],"uses":[11],"the":[12,50,76,146,177,193,200],"multi-layered":[13],"compute":[14],"and":[15,42,72,114,129],"memory":[16],"hierarchies.":[17],"For":[18],"example,":[19],"accelerators":[20],"such":[21],"as":[22,143],"NVIDIA":[23],"?s":[24],"Tensor":[25,153],"Cores":[26],"require":[27],"specific":[28],"mappings":[29],"of":[30,52,149,179,187],"threads":[31],"to":[32,41,48,74,82,111,141,175],"data":[33,39,130],"that":[34,108,126,136,170,192],"must":[35,118],"considered":[37],"in":[38,64,121],"movements":[40,131],"from":[43],"registers.":[44],"Current":[45],"compilers":[46,112],"struggle":[47],"match":[49],"vendor":[53],"libraries":[54],"like":[55,152],"cu":[56,205],"BLAS":[57,206],",":[58],"which":[59,115],"are":[60,109,132,173],"developed":[61],"by":[62,196,204,208],"experts":[63],"assembly.":[65,122],"This":[66],"manual":[67],"low-level":[68],"coding":[69],"is":[70,125],"time-consuming":[71],"complicates":[73],"unlock":[75],"full":[77],"potential,":[79],"preventing":[80],"experimentation":[81],"achieve":[83],"even":[84],"higher":[85],"performance.":[86],"In":[87],"this":[88],"paper":[89],"we":[90,168,190],"introduce":[91],"Fireiron,":[92],"scheduling":[94],"language":[95],"aimed":[96],"at":[97],"experts.":[99],"Fireiron":[100,157,171,197],"provides":[101],"high-level":[102],"abstractions":[103],"for":[104,145],"expressing":[105],"optimizations":[107],"unavailable":[110],"today":[113],"so":[116],"far":[117],"written":[120],"Our":[123],"innovation":[124],"both":[127],"computations":[128],"first":[133],"class":[134],"concepts":[135],"separately":[139],"mapped":[140],"threads,":[142],"required":[144],"efficient":[147],"use":[148],"specialized":[150],"hardware":[151],"Cores.":[154],"We":[155],"evaluate":[156],"on":[158],"three":[159],"architectures":[161],"against":[162],"expert-written":[163],"advanced":[164],"matrix":[165],"multiplications.":[166],"First,":[167],"show":[169,191],"schedules":[172,198],"able":[174],"express":[176],"strategies":[178],"these":[180],"implementations":[181,202],"requiring":[182],"about":[183],"6\u00d7":[184],"less":[185],"lines":[186],"code.":[188],"Second,":[189],"code":[194],"generated":[195],"outperforms":[199],"fastest":[201],"(provided":[203],")":[207],"more":[209],"than":[210],"2\u00d7.":[211]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2020-10-08T00:00:00"}
