{"id":"https://openalex.org/W2980104813","doi":"https://doi.org/10.1145/3352460.3358302","title":"Simba","display_name":"Simba","publication_year":2019,"publication_date":"2019-10-11","ids":{"openalex":"https://openalex.org/W2980104813","doi":"https://doi.org/10.1145/3352460.3358302","mag":"2980104813"},"language":"en","primary_location":{"id":"doi:10.1145/3352460.3358302","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3352460.3358302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008683881","display_name":"Yakun Sophia Shao","orcid":"https://orcid.org/0000-0003-1811-5407"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yakun Sophia Shao","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056153489","display_name":"Jason Clemons","orcid":"https://orcid.org/0000-0001-5533-417X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jason Clemons","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045219356","display_name":"Rangharajan Venkatesan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rangharajan Venkatesan","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042467215","display_name":"Brian Zimmer","orcid":"https://orcid.org/0000-0001-9997-3141"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brian Zimmer","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087082539","display_name":"Matthew Fojtik","orcid":"https://orcid.org/0000-0003-3138-9293"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Matthew Fojtik","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101452819","display_name":"Nan Jiang","orcid":"https://orcid.org/0000-0001-6014-299X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nan Jiang","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001800455","display_name":"Ben Keller","orcid":"https://orcid.org/0000-0002-8117-1412"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ben Keller","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009735174","display_name":"Alicia Klinefelter","orcid":"https://orcid.org/0000-0002-0149-0393"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alicia Klinefelter","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050270997","display_name":"Nathaniel Pinckney","orcid":"https://orcid.org/0000-0001-6159-8964"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nathaniel Pinckney","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029554261","display_name":"Priyanka Raina","orcid":"https://orcid.org/0000-0002-8834-8663"},"institutions":[{"id":"https://openalex.org/I4210137306","display_name":"Stanford Medicine","ror":"https://ror.org/03mtd9a03","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I4210137306","https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Priyanka Raina","raw_affiliation_strings":["Stanford"],"affiliations":[{"raw_affiliation_string":"Stanford","institution_ids":["https://openalex.org/I4210137306"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102966476","display_name":"Stephen G. Tell","orcid":"https://orcid.org/0000-0002-2166-1452"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stephen G. Tell","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100612105","display_name":"Yanqing Zhang","orcid":"https://orcid.org/0000-0003-2349-1925"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanqing Zhang","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084342236","display_name":"William J. Dally","orcid":"https://orcid.org/0000-0003-4632-2876"},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"William J. Dally","raw_affiliation_strings":["NVIDIA/Stanford"],"affiliations":[{"raw_affiliation_string":"NVIDIA/Stanford","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024384625","display_name":"Joel Emer","orcid":null},"institutions":[{"id":"https://openalex.org/I1304085615","display_name":"Nvidia (United Kingdom)","ror":"https://ror.org/02kr42612","country_code":"GB","type":"company","lineage":["https://openalex.org/I1304085615","https://openalex.org/I4210127875"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Joel Emer","raw_affiliation_strings":["NVIDIA/MIT"],"affiliations":[{"raw_affiliation_string":"NVIDIA/MIT","institution_ids":["https://openalex.org/I1304085615"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069026297","display_name":"C. Thomas Gray","orcid":"https://orcid.org/0000-0002-5137-5617"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"C. Thomas Gray","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010156116","display_name":"Brucek Khailany","orcid":"https://orcid.org/0000-0002-7584-3489"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Brucek Khailany","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063354509","display_name":"Stephen W. Keckler","orcid":"https://orcid.org/0000-0001-6701-6099"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Stephen W. Keckler","raw_affiliation_strings":["NVIDIA"],"affiliations":[{"raw_affiliation_string":"NVIDIA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":17,"corresponding_author_ids":["https://openalex.org/A5008683881"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":14.7116,"has_fulltext":false,"cited_by_count":382,"citation_normalized_percentile":{"value":0.9914088,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"14","last_page":"27"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11992","display_name":"CCD and CMOS Imaging Sensors","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8065423965454102},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6846151947975159},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.582088828086853},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.47650209069252014},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.41915589570999146},{"id":"https://openalex.org/keywords/chip","display_name":"Chip","score":0.41594311594963074},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.36306801438331604},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3598952293395996},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3505750894546509},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.34040769934654236},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.18164411187171936},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.10038179159164429},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.07823404669761658}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8065423965454102},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6846151947975159},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.582088828086853},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.47650209069252014},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.41915589570999146},{"id":"https://openalex.org/C165005293","wikidata":"https://www.wikidata.org/wiki/Q1074500","display_name":"Chip","level":2,"score":0.41594311594963074},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.36306801438331604},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3598952293395996},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3505750894546509},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.34040769934654236},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.18164411187171936},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.10038179159164429},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.07823404669761658}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3352460.3358302","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3352460.3358302","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 52nd Annual IEEE/ACM International Symposium on Microarchitecture","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.5600000023841858,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":79,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1903029394","https://openalex.org/W1910657905","https://openalex.org/W1978187377","https://openalex.org/W2034822545","https://openalex.org/W2048266589","https://openalex.org/W2065653320","https://openalex.org/W2067523571","https://openalex.org/W2090776550","https://openalex.org/W2097117768","https://openalex.org/W2100720925","https://openalex.org/W2102543317","https://openalex.org/W2102605133","https://openalex.org/W2117324528","https://openalex.org/W2125203716","https://openalex.org/W2139239342","https://openalex.org/W2140590655","https://openalex.org/W2140834004","https://openalex.org/W2143518405","https://openalex.org/W2151233837","https://openalex.org/W2152839228","https://openalex.org/W2155893237","https://openalex.org/W2163605009","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2234584938","https://openalex.org/W2320552075","https://openalex.org/W2442974303","https://openalex.org/W2466675884","https://openalex.org/W2513554817","https://openalex.org/W2516141709","https://openalex.org/W2518281301","https://openalex.org/W2565305208","https://openalex.org/W2565851976","https://openalex.org/W2592428089","https://openalex.org/W2605347906","https://openalex.org/W2606722458","https://openalex.org/W2611430843","https://openalex.org/W2612445135","https://openalex.org/W2613989746","https://openalex.org/W2625200202","https://openalex.org/W2625457103","https://openalex.org/W2626778328","https://openalex.org/W2626991402","https://openalex.org/W2733054703","https://openalex.org/W2766789999","https://openalex.org/W2772144122","https://openalex.org/W2790925711","https://openalex.org/W2791175999","https://openalex.org/W2791952321","https://openalex.org/W2794384407","https://openalex.org/W2794670651","https://openalex.org/W2883929540","https://openalex.org/W2884166449","https://openalex.org/W2890947558","https://openalex.org/W2898991608","https://openalex.org/W2903868561","https://openalex.org/W2919115771","https://openalex.org/W2922220370","https://openalex.org/W2935331687","https://openalex.org/W2940862705","https://openalex.org/W2949382160","https://openalex.org/W2949888546","https://openalex.org/W2950656546","https://openalex.org/W2952865063","https://openalex.org/W2952926545","https://openalex.org/W2963037989","https://openalex.org/W2963367920","https://openalex.org/W2964391226","https://openalex.org/W2977415399","https://openalex.org/W2997828269","https://openalex.org/W4240168186","https://openalex.org/W4245152553","https://openalex.org/W4249932213","https://openalex.org/W4252742548","https://openalex.org/W4285719527","https://openalex.org/W4323927861","https://openalex.org/W6600007113","https://openalex.org/W6601013545"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2027972911","https://openalex.org/W2097707447","https://openalex.org/W3008625068","https://openalex.org/W3128807919","https://openalex.org/W3176411177","https://openalex.org/W3035501883"],"abstract_inverted_index":{"Package-level":[0],"integration":[1],"using":[2,69],"multi-chip-modules":[3],"(MCMs)":[4],"is":[5,131],"a":[6,15,26,40,99,135],"promising":[7],"approach":[8],"for":[9,74,104],"building":[10],"large-scale":[11],"systems.":[12],"Compared":[13],"to":[14,47,121,126,133,141,166,170],"large":[16,44,82],"monolithic":[17],"die,":[18],"an":[19,78],"MCM":[20,102,117,130],"combines":[21],"many":[22],"smaller":[23],"chiplets":[24,45,73],"into":[25],"larger":[27],"system,":[28],"substantially":[29],"reducing":[30],"fabrication":[31],"and":[32,52,62,66,84,96,114,124,145],"design":[33],"costs.":[34],"Current":[35],"MCMs":[36,70],"typically":[37],"only":[38],"contain":[39],"handful":[41],"of":[42,68,138,189,194],"coarse-grained":[43],"due":[46],"the":[48,64,90,115,142,171],"high":[49],"area,":[50],"performance,":[51,113],"energy":[53],"overheads":[54],"associated":[55],"with":[56,71,81,186],"inter-chiplet":[57,150],"communication.":[58],"This":[59],"work":[60],"investigates":[61],"quantifies":[63],"costs":[65],"benefits":[67],"fine-grained":[72],"deep":[75],"learning":[76],"inference,":[77],"application":[79],"area":[80],"compute":[83,144],"on-chip":[85],"storage":[86,146],"requirements.":[87],"To":[88,148],"evaluate":[89],"approach,":[91],"we":[92,153],"architected,":[93],"implemented,":[94],"fabricated,":[95],"tested":[97],"Simba,":[98],"36-chiplet":[100,116],"prototype":[101],"system":[103],"deep-learning":[105],"inference.":[106],"Each":[107],"chiplet":[108],"achieves":[109,119],"4":[110],"TOPS":[111,123],"peak":[112],"package":[118],"up":[120,125,165],"128":[122],"6.1":[127],"TOPS/W.":[128],"The":[129],"configurable":[132],"support":[134],"flexible":[136],"mapping":[137],"DNN":[139],"layers":[140],"distributed":[143],"units.":[147],"mitigate":[149],"communication":[151],"overheads,":[152],"introduce":[154],"three":[155],"tiling":[156],"optimizations":[157,163],"that":[158,178],"improve":[159],"data":[160],"locality.":[161],"These":[162],"achieve":[164],"16%":[167],"speedup":[168],"compared":[169],"baseline":[172],"layer":[173],"mapping.":[174],"Our":[175],"evaluation":[176],"shows":[177],"Simba":[179],"can":[180],"process":[181],"1988":[182],"images/s":[183],"running":[184],"ResNet-50":[185],"batch":[187],"size":[188],"one,":[190],"delivering":[191],"inference":[192],"latency":[193],"0.50":[195],"ms.":[196]},"counts_by_year":[{"year":2026,"cited_by_count":21},{"year":2025,"cited_by_count":82},{"year":2024,"cited_by_count":65},{"year":2023,"cited_by_count":70},{"year":2022,"cited_by_count":44},{"year":2021,"cited_by_count":73},{"year":2020,"cited_by_count":26},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2019-10-18T00:00:00"}
