{"id":"https://openalex.org/W4395106467","doi":"https://doi.org/10.1145/3620666.3651328","title":"TinyForge: A Design Space Exploration to Advance Energy and Silicon Area Trade-offs in tinyML Compute Architectures with Custom Latch Arrays","display_name":"TinyForge: A Design Space Exploration to Advance Energy and Silicon Area Trade-offs in tinyML Compute Architectures with Custom Latch Arrays","publication_year":2024,"publication_date":"2024-04-24","ids":{"openalex":"https://openalex.org/W4395106467","doi":"https://doi.org/10.1145/3620666.3651328"},"language":"en","primary_location":{"id":"doi:10.1145/3620666.3651328","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620666.3651328","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3620666.3651328","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065617345","display_name":"Massimo Giordano","orcid":"https://orcid.org/0000-0002-7012-4135"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Massimo Giordano","raw_affiliation_strings":["Stanford University, Stanford, California, United States of America"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, California, United States of America","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062360652","display_name":"Rohan Doshi","orcid":"https://orcid.org/0009-0005-4923-825X"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rohan Doshi","raw_affiliation_strings":["Stanford University, Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019570332","display_name":"Qianyun Lu","orcid":"https://orcid.org/0000-0002-0466-5072"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qianyun Lu","raw_affiliation_strings":["Stanford University, Stanford, USA"],"affiliations":[{"raw_affiliation_string":"Stanford University, Stanford, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029806914","display_name":"Boris Murmann","orcid":"https://orcid.org/0000-0003-3417-8782"},"institutions":[{"id":"https://openalex.org/I117965899","display_name":"University of Hawai\u02bbi at M\u0101noa","ror":"https://ror.org/01wspgy28","country_code":"US","type":"education","lineage":["https://openalex.org/I117965899"]},{"id":"https://openalex.org/I1331384533","display_name":"University of Hawaii System","ror":"https://ror.org/03tzaeb71","country_code":"US","type":"education","lineage":["https://openalex.org/I1331384533"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Boris Murmann","raw_affiliation_strings":["University of Hawaii, Honolulu, Hawaii, USA"],"affiliations":[{"raw_affiliation_string":"University of Hawaii, Honolulu, Hawaii, USA","institution_ids":["https://openalex.org/I117965899","https://openalex.org/I1331384533"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5065617345"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":0.7479,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.69888449,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1033","last_page":"1047"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7463118433952332},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.5682735443115234},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.5329656600952148},{"id":"https://openalex.org/keywords/efficient-energy-use","display_name":"Efficient energy use","score":0.5247887969017029},{"id":"https://openalex.org/keywords/design-space-exploration","display_name":"Design space exploration","score":0.49779820442199707},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4365186095237732},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4358881711959839},{"id":"https://openalex.org/keywords/high-level-synthesis","display_name":"High-level synthesis","score":0.42797911167144775},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4186110496520996},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.392236590385437},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.35417303442955017},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.11304426193237305}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7463118433952332},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.5682735443115234},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.5329656600952148},{"id":"https://openalex.org/C2742236","wikidata":"https://www.wikidata.org/wiki/Q924713","display_name":"Efficient energy use","level":2,"score":0.5247887969017029},{"id":"https://openalex.org/C2776221188","wikidata":"https://www.wikidata.org/wiki/Q21072556","display_name":"Design space exploration","level":2,"score":0.49779820442199707},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4365186095237732},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4358881711959839},{"id":"https://openalex.org/C58013763","wikidata":"https://www.wikidata.org/wiki/Q5754574","display_name":"High-level synthesis","level":3,"score":0.42797911167144775},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4186110496520996},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.392236590385437},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.35417303442955017},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.11304426193237305},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3620666.3651328","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620666.3651328","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3620666.3651328","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3620666.3651328","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 3","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.7900000214576721,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1963882043","https://openalex.org/W2016053056","https://openalex.org/W2025548625","https://openalex.org/W2026293753","https://openalex.org/W2123541507","https://openalex.org/W2126105956","https://openalex.org/W2130325614","https://openalex.org/W2164586147","https://openalex.org/W2289252105","https://openalex.org/W2331783522","https://openalex.org/W2402693132","https://openalex.org/W2593564159","https://openalex.org/W2594789754","https://openalex.org/W2804234074","https://openalex.org/W2911788310","https://openalex.org/W2931743911","https://openalex.org/W2940862705","https://openalex.org/W2960696436","https://openalex.org/W2963163009","https://openalex.org/W2974875810","https://openalex.org/W2975429091","https://openalex.org/W2980200167","https://openalex.org/W2987956272","https://openalex.org/W2991649285","https://openalex.org/W2992647434","https://openalex.org/W2994931014","https://openalex.org/W2997929983","https://openalex.org/W2998732502","https://openalex.org/W3008591352","https://openalex.org/W3038347349","https://openalex.org/W3094136258","https://openalex.org/W3105966348","https://openalex.org/W3134526034","https://openalex.org/W3158233068","https://openalex.org/W3168905915","https://openalex.org/W3185576717","https://openalex.org/W3198544746","https://openalex.org/W3199533834","https://openalex.org/W3200280098","https://openalex.org/W3202905974","https://openalex.org/W4221038786","https://openalex.org/W4281691794","https://openalex.org/W4288083528","https://openalex.org/W4360606472","https://openalex.org/W4360606474","https://openalex.org/W4360606495"],"related_works":["https://openalex.org/W4281926497","https://openalex.org/W2274562545","https://openalex.org/W2612099726","https://openalex.org/W2269990635","https://openalex.org/W3146054601","https://openalex.org/W2042762783","https://openalex.org/W4283730710","https://openalex.org/W2921149022","https://openalex.org/W4313484792","https://openalex.org/W4282568311"],"abstract_inverted_index":{"The":[0],"proliferation":[1],"of":[2,109,116,129,174,205,211,242,264],"smart":[3],"IoT":[4],"devices":[5],"has":[6],"given":[7],"rise":[8],"to":[9,147,157,197,274,278],"tinyML,":[10],"which":[11],"deploys":[12],"deep":[13],"neural":[14],"networks":[15],"on":[16,209],"resource-constrained":[17],"systems,":[18],"benefitting":[19],"from":[20],"custom":[21,51,266],"hardware":[22,180],"that":[23,85],"optimizes":[24],"for":[25,166,179,186],"low":[26,42],"silicon":[27,292],"area":[28,119,236],"and":[29,41,64,76,105,120,133,151,169,177,218,269,289],"high":[30],"energy":[31,229,250,281],"efficiency":[32],"amidst":[33],"tinyML's":[34],"characteristic":[35],"small":[36],"model":[37],"sizes":[38],"(50-500":[39],"KB)":[40],"target":[43],"frequencies":[44],"(1-100":[45],"MHz).":[46],"We":[47,201,238],"introduce":[48],"a":[49,56,101,114,143,163,183,191],"novel":[50],"latch":[52,267],"array":[53,104],"integrated":[54],"with":[55,155],"compute":[57,103,170,272],"memory":[58,92,167],"fabric,":[59,171],"achieving":[60],"8":[61],"\u03bcm2/B":[62],"density":[63,75],"11":[65],"fJ/B":[66],"read":[67,79],"energy,":[68],"surpassing":[69],"synthesized":[70],"implementations":[71],"by":[72,253,276],"7x":[73],"in":[74,78,100,118,122,226],"5x":[77],"energy.":[80,124],"This":[81],"advancement":[82],"enables":[83],"dataflows":[84],"do":[86],"not":[87],"require":[88],"activation":[89],"buffers,":[90],"reducing":[91,248],"overheads.":[93,237],"By":[94],"optimizing":[95],"systolic":[96],"vs.":[97,255],"combinational":[98],"scaling":[99],"2D":[102],"using":[106],"bit-serial":[107,270],"instead":[108],"bit-parallel":[110],"compute,":[111],"we":[112,140,260],"achieve":[113],"reduction":[115],"4.8x":[117],"2.3x":[121],"multiply-accumulate":[123],"To":[125],"study":[126],"the":[127,130,137,153,203,212,219,228,234,240,245,249,262,265,280,285,291],"advantages":[128],"proposed":[131,207],"architecture":[132,208],"its":[134,224],"performance":[135,204],"at":[136],"system":[138],"level,":[139],"architect":[141],"tinyForge,":[142],"design":[144],"space":[145],"exploration":[146],"obtain":[148],"Pareto-optimal":[149,199],"architectures":[150],"compare":[152],"trade-offs":[154],"respect":[156],"traditional":[158],"approaches.":[159],"tinyForge":[160],"comprises":[161],"(1)":[162],"parameterized":[164],"template":[165],"hierarchies":[168],"(2)":[172],"estimations":[173],"power,":[175],"area,":[176],"latency":[178,286],"components,":[181],"(3)":[182],"dataflow":[184],"optimizer":[185],"efficient":[187],"workload":[188],"scheduling,":[189],"(4)":[190],"genetic":[192],"algorithm":[193],"performing":[194],"multi-objective":[195],"optimization":[196],"find":[198],"architectures.":[200],"evaluate":[202],"our":[206],"all":[210,244],"MLPerf":[213],"Tiny":[214],"Inference":[215],"Benchmark":[216],"workloads,":[217],"BERT-Tiny":[220],"transformer":[221],"model,":[222],"demonstrating":[223],"effectiveness":[225],"lowering":[227],"per":[230,251,282,287],"inference":[231,252],"while":[232],"addressing":[233],"introduced":[235],"show":[239],"importance":[241],"storing":[243],"weights":[246],"on-chip,":[247],"7.5x":[254],"utilizing":[256],"off-chip":[257],"memories.":[258],"Finally,":[259],"demonstrate":[261],"potential":[263],"arrays":[268,273],"digital":[271],"reduce":[275],"up":[277],"1.8x":[279],"inference,":[283,288],"2.2x":[284],"3.7x":[290],"area.":[293]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
