{"id":"https://openalex.org/W4213249026","doi":"https://doi.org/10.1145/3503222.3507767","title":"A full-stack search technique for domain optimized deep learning accelerators","display_name":"A full-stack search technique for domain optimized deep learning accelerators","publication_year":2022,"publication_date":"2022-02-22","ids":{"openalex":"https://openalex.org/W4213249026","doi":"https://doi.org/10.1145/3503222.3507767"},"language":"en","primary_location":{"id":"doi:10.1145/3503222.3507767","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3503222.3507767","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3503222.3507767","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3503222.3507767","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082010971","display_name":"Dan Zhang","orcid":"https://orcid.org/0000-0001-5112-1839"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dan Zhang","raw_affiliation_strings":["Google Brain, USA"],"raw_orcid":"https://orcid.org/0000-0001-5112-1839","affiliations":[{"raw_affiliation_string":"Google Brain, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075152320","display_name":"Safeen Huda","orcid":"https://orcid.org/0000-0001-8391-0509"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Safeen Huda","raw_affiliation_strings":["Google, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059274858","display_name":"Ebrahim M. Songhori","orcid":"https://orcid.org/0000-0001-7450-1594"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ebrahim Songhori","raw_affiliation_strings":["Google Brain, USA"],"raw_orcid":"https://orcid.org/0000-0001-7450-1594","affiliations":[{"raw_affiliation_string":"Google Brain, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062937114","display_name":"Kartik Prabhu","orcid":"https://orcid.org/0000-0002-4179-1692"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kartik Prabhu","raw_affiliation_strings":["Stanford University, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Stanford University, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088551093","display_name":"Quoc V. Le","orcid":"https://orcid.org/0000-0002-1087-2844"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Quoc Le","raw_affiliation_strings":["Google Brain, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Brain, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088885573","display_name":"Anna Goldie","orcid":"https://orcid.org/0000-0002-4887-6293"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Anna Goldie","raw_affiliation_strings":["Google Brain, USA"],"raw_orcid":"https://orcid.org/0000-0002-4887-6293","affiliations":[{"raw_affiliation_string":"Google Brain, USA","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070731184","display_name":"Azalia Mirhoseini","orcid":"https://orcid.org/0000-0002-2440-0944"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Azalia Mirhoseini","raw_affiliation_strings":["Google Brain, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Google Brain, USA","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":4.4676,"has_fulltext":true,"cited_by_count":50,"citation_normalized_percentile":{"value":0.95735841,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"27","last_page":"42"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8267229795455933},{"id":"https://openalex.org/keywords/suite","display_name":"Suite","score":0.6610875129699707},{"id":"https://openalex.org/keywords/compiler","display_name":"Compiler","score":0.6562453508377075},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5363764762878418},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.5116150975227356},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4996371269226074},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.4518612325191498},{"id":"https://openalex.org/keywords/datapath","display_name":"Datapath","score":0.4242284297943115},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4240152835845947},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.417140930891037},{"id":"https://openalex.org/keywords/stack","display_name":"Stack (abstract data type)","score":0.4164283275604248},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.4138185679912567},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.33054548501968384},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3067263662815094},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.2968411445617676},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24659675359725952}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8267229795455933},{"id":"https://openalex.org/C79581498","wikidata":"https://www.wikidata.org/wiki/Q1367530","display_name":"Suite","level":2,"score":0.6610875129699707},{"id":"https://openalex.org/C169590947","wikidata":"https://www.wikidata.org/wiki/Q47506","display_name":"Compiler","level":2,"score":0.6562453508377075},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5363764762878418},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.5116150975227356},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4996371269226074},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.4518612325191498},{"id":"https://openalex.org/C2781198647","wikidata":"https://www.wikidata.org/wiki/Q1633673","display_name":"Datapath","level":2,"score":0.4242284297943115},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4240152835845947},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.417140930891037},{"id":"https://openalex.org/C9395851","wikidata":"https://www.wikidata.org/wiki/Q177929","display_name":"Stack (abstract data type)","level":2,"score":0.4164283275604248},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.4138185679912567},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.33054548501968384},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3067263662815094},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.2968411445617676},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24659675359725952},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3503222.3507767","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3503222.3507767","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3503222.3507767","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2105.12842","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2105.12842","pdf_url":"https://arxiv.org/pdf/2105.12842","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3503222.3507767","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3503222.3507767","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3503222.3507767","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Architectural Support for Programming Languages and Operating Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9","score":0.5899999737739563}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4213249026.pdf","grobid_xml":"https://content.openalex.org/works/W4213249026.grobid-xml"},"referenced_works_count":105,"referenced_works":["https://openalex.org/W164384110","https://openalex.org/W1552145309","https://openalex.org/W1632863093","https://openalex.org/W1658455281","https://openalex.org/W1701825639","https://openalex.org/W1977850862","https://openalex.org/W1983096721","https://openalex.org/W2042519306","https://openalex.org/W2055312318","https://openalex.org/W2067523571","https://openalex.org/W2094756095","https://openalex.org/W2116433835","https://openalex.org/W2129010304","https://openalex.org/W2172654076","https://openalex.org/W2194775991","https://openalex.org/W2276486856","https://openalex.org/W2279098554","https://openalex.org/W2294282016","https://openalex.org/W2318354022","https://openalex.org/W2531409750","https://openalex.org/W2584311934","https://openalex.org/W2606722458","https://openalex.org/W2612076670","https://openalex.org/W2614991284","https://openalex.org/W2625954420","https://openalex.org/W2732547613","https://openalex.org/W2742479298","https://openalex.org/W2747329762","https://openalex.org/W2774328333","https://openalex.org/W2794670651","https://openalex.org/W2798956872","https://openalex.org/W2799537975","https://openalex.org/W2801655600","https://openalex.org/W2889314677","https://openalex.org/W2896457183","https://openalex.org/W2896983500","https://openalex.org/W2899915146","https://openalex.org/W2901681686","https://openalex.org/W2906043559","https://openalex.org/W2937092700","https://openalex.org/W2940862705","https://openalex.org/W2944824859","https://openalex.org/W2953212265","https://openalex.org/W2955425717","https://openalex.org/W2962856739","https://openalex.org/W2963163009","https://openalex.org/W2963918968","https://openalex.org/W2964054286","https://openalex.org/W2964259004","https://openalex.org/W2965373594","https://openalex.org/W2969491671","https://openalex.org/W2969940675","https://openalex.org/W2970597249","https://openalex.org/W2974152075","https://openalex.org/W2980104813","https://openalex.org/W2981969038","https://openalex.org/W2995955137","https://openalex.org/W2996428491","https://openalex.org/W2997929983","https://openalex.org/W3008591352","https://openalex.org/W3010757996","https://openalex.org/W3015441707","https://openalex.org/W3016142271","https://openalex.org/W3017521908","https://openalex.org/W3023950925","https://openalex.org/W3036878841","https://openalex.org/W3040573126","https://openalex.org/W3047049572","https://openalex.org/W3089703283","https://openalex.org/W3092020069","https://openalex.org/W3092216346","https://openalex.org/W3092334294","https://openalex.org/W3101965820","https://openalex.org/W3102790199","https://openalex.org/W3104745751","https://openalex.org/W3109915984","https://openalex.org/W3111579839","https://openalex.org/W3112293503","https://openalex.org/W3112948415","https://openalex.org/W3123054690","https://openalex.org/W3130554079","https://openalex.org/W3131592046","https://openalex.org/W3131920484","https://openalex.org/W3132942233","https://openalex.org/W3135807226","https://openalex.org/W3153174751","https://openalex.org/W3153760339","https://openalex.org/W3156745629","https://openalex.org/W3172301985","https://openalex.org/W3190062760","https://openalex.org/W4238366336","https://openalex.org/W4240365877","https://openalex.org/W4242577057","https://openalex.org/W4249932213","https://openalex.org/W4256629673","https://openalex.org/W4287204036","https://openalex.org/W4287391717","https://openalex.org/W4287900772","https://openalex.org/W4289276774","https://openalex.org/W4292169167","https://openalex.org/W4292779060","https://openalex.org/W4297775537","https://openalex.org/W4297813615","https://openalex.org/W4385245566","https://openalex.org/W6784370339"],"related_works":["https://openalex.org/W2109699519","https://openalex.org/W2293118914","https://openalex.org/W2006568360","https://openalex.org/W2998381397","https://openalex.org/W4236419692","https://openalex.org/W102726818","https://openalex.org/W4233616027","https://openalex.org/W2059591361","https://openalex.org/W970262775","https://openalex.org/W2537803004"],"abstract_inverted_index":{"The":[0],"rapidly-changing":[1],"deep":[2],"learning":[3],"landscape":[4],"presents":[5],"a":[6,25,32,113],"unique":[7],"opportunity":[8],"for":[9,14,92,111,139],"building":[10],"inference":[11],"accelerators":[12,83,90,134],"optimized":[13,91,110],"specific":[15],"datacenter-scale":[16],"workloads.":[17],"We":[18],"propose":[19],"Full-stack":[20],"Accelerator":[21],"Search":[22],"Technique":[23],"(FAST),":[24],"hardware":[26,45],"accelerator":[27,109],"search":[28],"framework":[29],"that":[30,132],"defines":[31],"broad":[33],"optimization":[34],"environment":[35],"covering":[36],"key":[37],"design":[38,82],"decisions":[39],"within":[40],"the":[41],"hardware-software":[42],"stack,":[43],"including":[44,74],"datapath,":[46],"software":[47],"scheduling,":[48],"and":[49,56,68,76,78],"compiler":[50],"passes":[51],"such":[52],"as":[53],"operation":[54],"fusion":[55],"tensor":[57],"padding.":[58],"In":[59],"this":[60],"paper,":[61],"we":[62],"analyze":[63],"bottlenecks":[64],"in":[65],"state-of-the-art":[66],"vision":[67],"natural":[69],"language":[70],"processing":[71],"(NLP)":[72],"models,":[73],"EfficientNet":[75],"BERT,":[77],"use":[79],"FAST":[80],"to":[81,105,124],"capable":[84],"of":[85,115],"addressing":[86],"these":[87],"bottlenecks.":[88],"FAST-generated":[89,108,133],"single":[93],"workloads":[94,116],"improve":[95],"Perf/TDP":[96,118],"by":[97,119],"3.7\u00d7":[98],"on":[99,121,128],"average":[100,122],"across":[101],"all":[102],"benchmarks":[103],"compared":[104,123],"TPU-v3.":[106,125],"A":[107],"serving":[112],"suite":[114],"improves":[117],"2.4\u00d7":[120],"Our":[126],"return":[127],"investment":[129],"analysis":[130],"shows":[131],"can":[135],"potentially":[136],"be":[137],"practical":[138],"moderate-sized":[140],"datacenter":[141],"deployments.":[142]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":16},{"year":2022,"cited_by_count":7}],"updated_date":"2026-06-19T17:40:00.097472","created_date":"2025-10-10T00:00:00"}
