{"id":"https://openalex.org/W4402571373","doi":"https://doi.org/10.1109/tcasai.2024.3461716","title":"LoopTree: Exploring the Fused-Layer Dataflow Accelerator Design Space","display_name":"LoopTree: Exploring the Fused-Layer Dataflow Accelerator Design Space","publication_year":2024,"publication_date":"2024-09-01","ids":{"openalex":"https://openalex.org/W4402571373","doi":"https://doi.org/10.1109/tcasai.2024.3461716"},"language":"en","primary_location":{"id":"doi:10.1109/tcasai.2024.3461716","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcasai.2024.3461716","pdf_url":null,"source":{"id":"https://openalex.org/S4404675360","display_name":"IEEE transactions on circuits and systems for artificial intelligence.","issn_l":"2996-6647","issn":["2996-6647"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2409.13625","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047577036","display_name":"Michael A. Gilbert","orcid":"https://orcid.org/0000-0002-2503-1626"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Michael Gilbert","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, School of Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA"],"raw_orcid":"https://orcid.org/0000-0002-2503-1626","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, School of Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050803970","display_name":"Yannan Nellie Wu","orcid":"https://orcid.org/0009-0001-0933-4600"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yannan Nellie Wu","raw_affiliation_strings":["Google, Mountain View, CA, USA"],"raw_orcid":"https://orcid.org/0009-0001-0933-4600","affiliations":[{"raw_affiliation_string":"Google, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024384625","display_name":"Joel Emer","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Joel S. Emer","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, School of Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA"],"raw_orcid":"https://orcid.org/0000-0002-3459-5466","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, School of Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066351389","display_name":"Vivienne Sze","orcid":null},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vivienne Sze","raw_affiliation_strings":["Department of Electrical Engineering and Computer Science, School of Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA"],"raw_orcid":"https://orcid.org/0000-0003-4841-3990","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and Computer Science, School of Engineering, Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5047577036"],"corresponding_institution_ids":["https://openalex.org/I63966007"],"apc_list":null,"apc_paid":null,"fwci":1.0092,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78015319,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"1","issue":"1","first_page":"97","last_page":"111"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9896000027656555,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9848999977111816,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9829000234603882,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.9308621883392334},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.5958559513092041},{"id":"https://openalex.org/keywords/layer","display_name":"Layer (electronics)","score":0.5887506008148193},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5221676826477051},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.47737619280815125},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.2919533848762512},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.17126724123954773},{"id":"https://openalex.org/keywords/nanotechnology","display_name":"Nanotechnology","score":0.15917488932609558},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.15206128358840942}],"concepts":[{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.9308621883392334},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.5958559513092041},{"id":"https://openalex.org/C2779227376","wikidata":"https://www.wikidata.org/wiki/Q6505497","display_name":"Layer (electronics)","level":2,"score":0.5887506008148193},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5221676826477051},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.47737619280815125},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2919533848762512},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.17126724123954773},{"id":"https://openalex.org/C171250308","wikidata":"https://www.wikidata.org/wiki/Q11468","display_name":"Nanotechnology","level":1,"score":0.15917488932609558},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.15206128358840942}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcasai.2024.3461716","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcasai.2024.3461716","pdf_url":null,"source":{"id":"https://openalex.org/S4404675360","display_name":"IEEE transactions on circuits and systems for artificial intelligence.","issn_l":"2996-6647","issn":["2996-6647"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Artificial Intelligence","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2409.13625","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.13625","pdf_url":"https://arxiv.org/pdf/2409.13625","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2409.13625","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2409.13625","pdf_url":"https://arxiv.org/pdf/2409.13625","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402571373.pdf","grobid_xml":"https://content.openalex.org/works/W4402571373.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W1558370006","https://openalex.org/W1772650917","https://openalex.org/W2068326054","https://openalex.org/W2613989746","https://openalex.org/W2905173465","https://openalex.org/W2906043559","https://openalex.org/W2925491732","https://openalex.org/W2935331687","https://openalex.org/W2940862705","https://openalex.org/W2963163009","https://openalex.org/W2979310060","https://openalex.org/W2982083293","https://openalex.org/W2998732502","https://openalex.org/W3017521908","https://openalex.org/W3024621361","https://openalex.org/W3132942233","https://openalex.org/W3135807226","https://openalex.org/W3159869264","https://openalex.org/W3190062760","https://openalex.org/W3192336523","https://openalex.org/W4205146836","https://openalex.org/W4232315234","https://openalex.org/W4243519499","https://openalex.org/W4244024631","https://openalex.org/W4249932213","https://openalex.org/W4253012315","https://openalex.org/W4280562683","https://openalex.org/W4283643682","https://openalex.org/W4293102247","https://openalex.org/W4312646647","https://openalex.org/W4360831791","https://openalex.org/W4380881063","https://openalex.org/W4389491877","https://openalex.org/W4389500477","https://openalex.org/W4401212157","https://openalex.org/W6637373629","https://openalex.org/W6640090968","https://openalex.org/W6683738474","https://openalex.org/W6684191040","https://openalex.org/W6687483927","https://openalex.org/W6739901393","https://openalex.org/W6755207826","https://openalex.org/W6759455113","https://openalex.org/W6840899694","https://openalex.org/W6864491216","https://openalex.org/W6869801542"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2293118914","https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W2171015181","https://openalex.org/W3167919718","https://openalex.org/W4251718783","https://openalex.org/W1998888015"],"abstract_inverted_index":{"Latency":[0],"and":[1,22,70,135,157,204,218],"energy":[2,23,68,213],"consumption":[3,69],"are":[4,161],"key":[5],"metrics":[6],"in":[7,52,59,123,179,223,256],"the":[8,41,73,87,94,116,141,154,211,275],"performance":[9],"of":[10,115,120,140,153,181,221,235],"deep":[11],"neural":[12],"network":[13],"(DNN)":[14],"accelerators.":[15],"A":[16],"significant":[17],"factor":[18],"contributing":[19],"to":[20,29,66,85,92,190,200,209,264,273],"latency":[21],"is":[24,34,76,100],"data":[25,33,36,49,58,75,118,183],"transfers.":[26],"One":[27],"method":[28],"reduce":[30,67,86],"transfers":[31,220],"or":[32],"reusing":[35],"when":[37],"multiple":[38],"operations":[39,51],"use":[40],"same":[42,276],"data.":[43,95],"<italic":[44,193],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[45,80,194,261,267],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Fused-layer":[46],"accelerators</i>":[47],"reuse":[48,93],"across":[50],"different":[53],"layers":[54],"by":[55],"retaining":[56,121],"intermediate":[57,74,117],"on-chip":[60,88,97,130],"buffers,":[61],"which":[62],"has":[63,176],"been":[64],"shown":[65],"latency.":[71],"Moreover,":[72],"often":[77],"tiled":[78],"(<italic":[79,260],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">i.e.</i>,":[81],"broken":[82],"into":[83],"chunks)":[84],"buffer":[89,98,131,215,270],"capacity":[90,99,216,271],"required":[91],"Because":[96],"frequently":[101],"more":[102,158,171,177,257],"limited":[103],"than":[104],"computation":[105],"units,":[106],"fused-layer":[107,142],"dataflow":[108,143],"accelerators":[109],"may":[110],"also":[111],"recompute":[112],"certain":[113],"parts":[114],"instead":[119],"them":[122,192],"a":[124,151,170,198,206,232,239,265],"buffer.":[125],"Achieving":[126],"efficient":[127,159,258],"trade-offs":[128],"between":[129],"capacity,":[132],"off-chip":[133,219,277],"transfers,":[134],"recomputation":[136,185],"requires":[137],"systematic":[138],"exploration":[139],"design":[144,155,173,225],"space.":[145,226],"However,":[146],"prior":[147,236],"work":[148],"only":[149],"explored":[150],"subset":[152],"space,":[156],"designs":[160,222,259],"left":[162],"unexplored.":[163],"In":[164],"this":[165,224,252],"work,":[166],"we":[167,244],"propose":[168],"(1)":[169],"extensive":[172],"space":[174,254],"that":[175,248],"choices":[178],"terms":[180],"tiling,":[182],"retention,":[184],"and,":[186],"importantly,":[187],"allows":[188],"us":[189],"explore":[191],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">in":[195],"combination</i>,":[196],"(2)":[197],"taxonomy":[199],"systematically":[201],"specify":[202],"designs,":[203],"(3)":[205],"model,":[207],"LoopTree,":[208],"evaluate":[210],"latency,":[212],"consumption,":[214],"requirements,":[217],"We":[227],"validate":[228],"our":[229],"model":[230],"against":[231],"representative":[233],"set":[234],"architectures,":[237],"achieving":[238],"worst-case":[240],"4%":[241],"error.":[242],"Finally,":[243],"present":[245],"case":[246],"studies":[247],"show":[249],"how":[250],"exploring":[251],"larger":[253],"results":[255],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">e.g.</i>,":[262],"up":[263],"10<inline-formula":[266],"xmlns:xlink=\"http://www.w3.org/1999/xlink\"><tex-math":[268],"notation=\"LaTeX\">$\\times$</tex-math></inline-formula>":[269],"reduction":[272],"achieve":[274],"transfers).":[278]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
