{"id":"https://openalex.org/W4360831836","doi":"https://doi.org/10.1109/hpca56546.2023.10070992","title":"INCA: Input-stationary Dataflow at Outside-the-box Thinking about Deep Learning Accelerators","display_name":"INCA: Input-stationary Dataflow at Outside-the-box Thinking about Deep Learning Accelerators","publication_year":2023,"publication_date":"2023-02-01","ids":{"openalex":"https://openalex.org/W4360831836","doi":"https://doi.org/10.1109/hpca56546.2023.10070992"},"language":"en","primary_location":{"id":"doi:10.1109/hpca56546.2023.10070992","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10070992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100638467","display_name":"Bokyung Kim","orcid":"https://orcid.org/0000-0002-6954-1476"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bokyung Kim","raw_affiliation_strings":["Duke University,Durham,NC,USA","Duke University, Durham, NC, USA"],"affiliations":[{"raw_affiliation_string":"Duke University,Durham,NC,USA","institution_ids":["https://openalex.org/I170897317"]},{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100413190","display_name":"Shiyu Li","orcid":"https://orcid.org/0000-0002-1990-7150"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shiyu Li","raw_affiliation_strings":["Duke University,Durham,NC,USA","Duke University, Durham, NC, USA"],"affiliations":[{"raw_affiliation_string":"Duke University,Durham,NC,USA","institution_ids":["https://openalex.org/I170897317"]},{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100429403","display_name":"Hai Li","orcid":"https://orcid.org/0000-0003-3228-6544"},"institutions":[{"id":"https://openalex.org/I170897317","display_name":"Duke University","ror":"https://ror.org/00py81415","country_code":"US","type":"education","lineage":["https://openalex.org/I170897317"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hai Li","raw_affiliation_strings":["Duke University,Durham,NC,USA","Duke University, Durham, NC, USA"],"affiliations":[{"raw_affiliation_string":"Duke University,Durham,NC,USA","institution_ids":["https://openalex.org/I170897317"]},{"raw_affiliation_string":"Duke University, Durham, NC, USA","institution_ids":["https://openalex.org/I170897317"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100638467"],"corresponding_institution_ids":["https://openalex.org/I170897317"],"apc_list":null,"apc_paid":null,"fwci":3.4207,"has_fulltext":false,"cited_by_count":26,"citation_normalized_percentile":{"value":0.92970352,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"29","last_page":"41"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12808","display_name":"Ferroelectric and Negative Capacitance Devices","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8019384145736694},{"id":"https://openalex.org/keywords/dataflow","display_name":"Dataflow","score":0.7682187557220459},{"id":"https://openalex.org/keywords/resistive-random-access-memory","display_name":"Resistive random-access memory","score":0.6875169277191162},{"id":"https://openalex.org/keywords/kernel","display_name":"Kernel (algebra)","score":0.5986655950546265},{"id":"https://openalex.org/keywords/crossbar-switch","display_name":"Crossbar switch","score":0.5183409452438354},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4453413188457489},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.4314321279525757},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4037241041660309},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3747328221797943},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.34326261281967163},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.32055604457855225},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24751520156860352},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.1804911494255066},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.09586578607559204}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019384145736694},{"id":"https://openalex.org/C96324660","wikidata":"https://www.wikidata.org/wiki/Q205446","display_name":"Dataflow","level":2,"score":0.7682187557220459},{"id":"https://openalex.org/C182019814","wikidata":"https://www.wikidata.org/wiki/Q1143830","display_name":"Resistive random-access memory","level":3,"score":0.6875169277191162},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.5986655950546265},{"id":"https://openalex.org/C29984679","wikidata":"https://www.wikidata.org/wiki/Q1929149","display_name":"Crossbar switch","level":2,"score":0.5183409452438354},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4453413188457489},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.4314321279525757},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4037241041660309},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3747328221797943},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.34326261281967163},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.32055604457855225},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24751520156860352},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.1804911494255066},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.09586578607559204},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/hpca56546.2023.10070992","is_oa":false,"landing_page_url":"https://doi.org/10.1109/hpca56546.2023.10070992","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Symposium on High-Performance Computer Architecture (HPCA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":74,"referenced_works":["https://openalex.org/W1686810756","https://openalex.org/W1892804072","https://openalex.org/W1978286847","https://openalex.org/W1998177673","https://openalex.org/W2080564856","https://openalex.org/W2085407034","https://openalex.org/W2108598243","https://openalex.org/W2112796928","https://openalex.org/W2113143253","https://openalex.org/W2117422822","https://openalex.org/W2194775991","https://openalex.org/W2285660444","https://openalex.org/W2323048532","https://openalex.org/W2409988735","https://openalex.org/W2415829409","https://openalex.org/W2442974303","https://openalex.org/W2463724064","https://openalex.org/W2469880822","https://openalex.org/W2481799190","https://openalex.org/W2508602506","https://openalex.org/W2518281301","https://openalex.org/W2518511512","https://openalex.org/W2524326635","https://openalex.org/W2592304361","https://openalex.org/W2605347906","https://openalex.org/W2613989746","https://openalex.org/W2713236649","https://openalex.org/W2742566056","https://openalex.org/W2772681688","https://openalex.org/W2785141883","https://openalex.org/W2799229073","https://openalex.org/W2899868573","https://openalex.org/W2899996135","https://openalex.org/W2902109746","https://openalex.org/W2904436509","https://openalex.org/W2908349016","https://openalex.org/W2912012512","https://openalex.org/W2914237701","https://openalex.org/W2930624726","https://openalex.org/W2946334728","https://openalex.org/W2946429553","https://openalex.org/W2949674408","https://openalex.org/W2949989598","https://openalex.org/W2963122961","https://openalex.org/W2963163009","https://openalex.org/W2963358710","https://openalex.org/W2963918968","https://openalex.org/W2967433097","https://openalex.org/W2970601456","https://openalex.org/W2980580339","https://openalex.org/W2984707426","https://openalex.org/W3005619596","https://openalex.org/W3015724253","https://openalex.org/W3016430712","https://openalex.org/W3047455354","https://openalex.org/W3102587717","https://openalex.org/W3105802176","https://openalex.org/W3106792220","https://openalex.org/W3111375540","https://openalex.org/W3112740243","https://openalex.org/W3136475599","https://openalex.org/W3137223153","https://openalex.org/W3155456425","https://openalex.org/W3167628828","https://openalex.org/W3187188899","https://openalex.org/W3187235458","https://openalex.org/W3191560475","https://openalex.org/W3205209748","https://openalex.org/W3206621799","https://openalex.org/W4232753305","https://openalex.org/W4251775051","https://openalex.org/W6637373629","https://openalex.org/W6639314256","https://openalex.org/W6767298317"],"related_works":["https://openalex.org/W3005999147","https://openalex.org/W3176428941","https://openalex.org/W3089883684","https://openalex.org/W4232634182","https://openalex.org/W4301187613","https://openalex.org/W2923038022","https://openalex.org/W3008646524","https://openalex.org/W4385624997","https://openalex.org/W2593506445","https://openalex.org/W2807127337"],"abstract_inverted_index":{"This":[0],"paper":[1],"first":[2],"presents":[3,305],"an":[4,306],"input-stationary":[5],"(IS)":[6],"implemented":[7],"crossbar":[8,212],"accelerator":[9,192],"(INCA),":[10],"supporting":[11],"inference":[12,281],"and":[13,39,42,60,83,99,103,117,131,137,194,222,275,282,287],"training":[14],"for":[15,23,80,101,114,186,251],"deep":[16],"neural":[17],"networks":[18],"(DNNs).":[19],"Processing-in-memory":[20],"(PIM)":[21],"accelerators":[22,54,89],"DNNs":[24],"have":[25,55],"been":[26,74],"actively":[27],"researched,":[28],"specifically,":[29],"with":[30,236],"resistive":[31],"random-access":[32],"memory":[33],"(RRAM),":[34],"due":[35,143],"to":[36,144,147,218,266,273,298],"RRAM\u2019s":[37,153],"computing":[38],"memorizing":[40],"capabilities":[41],"device":[43],"merits.":[44],"To":[45,227],"the":[46,76,156,172,210,224,241,261,267],"best":[47],"of":[48,111,174,177,190,197,263],"our":[49,301],"knowledge,":[50],"all":[51],"previous":[52],"PIM":[53,88],"saved":[56],"weights":[57,116,148],"into":[58,63],"RRAMs":[59,113,165],"inputs":[61,105,167],"(activations)":[62],"conventional":[64],"memories\u2014it":[65],"naturally":[66],"forms":[67],"weight-stationary":[68],"(WS)":[69],"dataflow.":[70],"WS":[71,268,296],"has":[72],"generally":[73],"considered":[75],"most":[77],"optimized":[78],"choice":[79],"high":[81,95,225,252],"parallelism":[82,253],"data":[84],"reuse.":[85],"How-ever,":[86],"WS-based":[87],"show":[90],"fundamental":[91],"limitations:":[92],"first,":[93],"remaining":[94],"dependency":[96,179],"on":[97,201,240],"DRAM":[98],"buffers":[100],"fetching":[102],"saving":[104],"(activations);":[106],"second,":[107],"a":[108,233,246],"remarkable":[109],"number":[110],"extra":[112,187],"transposed":[115],"additional":[118],"computational":[119],"intermediates":[120],"in":[121,135,254,280,295,300],"training;":[122],"third,":[123],"coarse-grained":[124],"arrays":[125],"demanding":[126],"high-bit":[127],"analog-to-digital":[128],"converters":[129],"(ADCs)":[130],"introducing":[132],"poor":[133],"utilization":[134],"depthwise":[136],"pointwise":[138],"convolution;":[139],"last,":[140],"degraded":[141],"accuracy":[142,294],"its":[145],"sensitivity":[146],"which":[149],"are":[150],"affected":[151],"by":[152,180,209],"nonideality.":[154],"On":[155],"other":[157],"hand,":[158],"we":[159,231,244],"observe":[160],"that":[161],"IS":[162,204],"dataflow,":[163],"where":[164],"retain":[166],"(activations),":[168],"can":[169],"effectively":[170],"address":[171],"limitations":[173],"WS,":[175],"because":[176,214],"low":[178],"only":[181],"loading":[182],"weights,":[183],"no":[184],"need":[185],"RRAMs,":[188],"feasibility":[189],"fine-grained":[191],"design,":[193],"less":[195],"impact":[196],"input":[198],"(activation)":[199],"variance":[200],"accuracy.":[202,313],"But":[203],"dataflow":[205],"is":[206,216],"hardly":[207],"achievable":[208],"existing":[211],"structure":[213,235],"it":[215],"difficult":[217],"implement":[219],"kernel":[220,229],"sliding":[221],"preserve":[223],"parallelism.":[226],"support":[228],"movement,":[230],"constitute":[232],"cell":[234],"two-transistor-one-RRAM":[237],"(2T1R).":[238],"Based":[239],"2T1R":[242],"cell,":[243],"design":[245],"novel":[247],"three-dimensional":[248],"(3D)":[249],"architecture":[250],"batch":[255],"training.":[256],"Our":[257],"experiment":[258],"results":[259],"prove":[260],"potential":[262],"INCA.":[264],"Compared":[265],"accelerator,":[269],"INCA":[270,304],"achieves":[271],"up":[272],"20.6\u00d7":[274],"260\u00d7":[276],"energy":[277],"efficiency":[278],"improvement":[279],"training,":[283],"respectively;":[284],"4.8\u00d7":[285],"(inference)":[286],"18.6\u00d7":[288],"(training)":[289],"speedup":[290],"as":[291,311],"well.":[292],"While":[293],"drops":[297],"15%":[299],"high-noise":[302],"simulation,":[303],"even":[307],"more":[308],"robust":[309],"result":[310],"86%":[312]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":11},{"year":2023,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
