{"id":"https://openalex.org/W4221061712","doi":"https://doi.org/10.1145/3524453","title":"A Unified Programmable Edge Matrix Processor for Deep Neural Networks and Matrix Algebra","display_name":"A Unified Programmable Edge Matrix Processor for Deep Neural Networks and Matrix Algebra","publication_year":2022,"publication_date":"2022-04-01","ids":{"openalex":"https://openalex.org/W4221061712","doi":"https://doi.org/10.1145/3524453"},"language":"en","primary_location":{"id":"doi:10.1145/3524453","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3524453","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3524453","source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3524453","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5086018840","display_name":"Biji George","orcid":"https://orcid.org/0000-0002-3551-3418"},"institutions":[{"id":"https://openalex.org/I4210146682","display_name":"Intel (India)","ror":"https://ror.org/04f2n1245","country_code":"IN","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210146682"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Biji George","raw_affiliation_strings":["Processor Architecture Research Lab, Intel Labs, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0002-3551-3418","affiliations":[{"raw_affiliation_string":"Processor Architecture Research Lab, Intel Labs, Karnataka, India","institution_ids":["https://openalex.org/I4210146682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031637729","display_name":"Om Ji Omer","orcid":"https://orcid.org/0000-0002-9149-5605"},"institutions":[{"id":"https://openalex.org/I4210146682","display_name":"Intel (India)","ror":"https://ror.org/04f2n1245","country_code":"IN","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210146682"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Om Ji Omer","raw_affiliation_strings":["Processor Architecture Research Lab, Intel Labs, Karnataka, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Processor Architecture Research Lab, Intel Labs, Karnataka, India","institution_ids":["https://openalex.org/I4210146682"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064037179","display_name":"Ziaul Choudhury","orcid":"https://orcid.org/0000-0001-9019-0239"},"institutions":[{"id":"https://openalex.org/I64189192","display_name":"International Institute of Information Technology, Hyderabad","ror":"https://ror.org/05f11g639","country_code":"IN","type":"education","lineage":["https://openalex.org/I64189192"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ziaul Choudhury","raw_affiliation_strings":["International Institute of Information Technology, Hyderabad, India"],"raw_orcid":"https://orcid.org/0000-0001-9019-0239","affiliations":[{"raw_affiliation_string":"International Institute of Information Technology, Hyderabad, India","institution_ids":["https://openalex.org/I64189192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030699601","display_name":"V. S. Anoop","orcid":null},"institutions":[{"id":"https://openalex.org/I4210146682","display_name":"Intel (India)","ror":"https://ror.org/04f2n1245","country_code":"IN","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210146682"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Anoop V","raw_affiliation_strings":["Xeon Server Group, Intel Technology India Pvt Ltd, Karnataka, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Xeon Server Group, Intel Technology India Pvt Ltd, Karnataka, India","institution_ids":["https://openalex.org/I4210146682"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5036799188","display_name":"Sreenivas Subramoney","orcid":"https://orcid.org/0000-0001-5372-0173"},"institutions":[{"id":"https://openalex.org/I4210146682","display_name":"Intel (India)","ror":"https://ror.org/04f2n1245","country_code":"IN","type":"company","lineage":["https://openalex.org/I1343180700","https://openalex.org/I4210146682"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sreenivas Subramoney","raw_affiliation_strings":["Processor Architecture Research Lab, Intel Labs, Karnataka, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Processor Architecture Research Lab, Intel Labs, Karnataka, India","institution_ids":["https://openalex.org/I4210146682"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5086018840"],"corresponding_institution_ids":["https://openalex.org/I4210146682"],"apc_list":null,"apc_paid":null,"fwci":2.0987,"has_fulltext":true,"cited_by_count":10,"citation_normalized_percentile":{"value":0.85637439,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"21","issue":"5","first_page":"1","last_page":"30"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11321","display_name":"Error Correcting Code Techniques","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10500","display_name":"Sparse and Compressive Sensing Techniques","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8170627355575562},{"id":"https://openalex.org/keywords/hardware-acceleration","display_name":"Hardware acceleration","score":0.5277224779129028},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5138412117958069},{"id":"https://openalex.org/keywords/matrix-multiplication","display_name":"Matrix multiplication","score":0.46886757016181946},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.41968023777008057},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.4151410460472107},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3929991126060486},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.3752024173736572},{"id":"https://openalex.org/keywords/computational-science","display_name":"Computational science","score":0.3691098690032959},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.35511812567710876},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.3518950641155243},{"id":"https://openalex.org/keywords/field-programmable-gate-array","display_name":"Field-programmable gate array","score":0.2963920831680298},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2782682776451111}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8170627355575562},{"id":"https://openalex.org/C13164978","wikidata":"https://www.wikidata.org/wiki/Q600158","display_name":"Hardware acceleration","level":3,"score":0.5277224779129028},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5138412117958069},{"id":"https://openalex.org/C17349429","wikidata":"https://www.wikidata.org/wiki/Q1049914","display_name":"Matrix multiplication","level":3,"score":0.46886757016181946},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.41968023777008057},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.4151410460472107},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3929991126060486},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.3752024173736572},{"id":"https://openalex.org/C459310","wikidata":"https://www.wikidata.org/wiki/Q117801","display_name":"Computational science","level":1,"score":0.3691098690032959},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.35511812567710876},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3518950641155243},{"id":"https://openalex.org/C42935608","wikidata":"https://www.wikidata.org/wiki/Q190411","display_name":"Field-programmable gate array","level":2,"score":0.2963920831680298},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2782682776451111},{"id":"https://openalex.org/C84114770","wikidata":"https://www.wikidata.org/wiki/Q46344","display_name":"Quantum","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3524453","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3524453","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3524453","source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3524453","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3524453","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3524453","source":{"id":"https://openalex.org/S136160450","display_name":"ACM Transactions on Embedded Computing Systems","issn_l":"1539-9087","issn":["1539-9087","1558-3465"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Embedded Computing Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4699999988079071,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4221061712.pdf","grobid_xml":"https://content.openalex.org/works/W4221061712.grobid-xml"},"referenced_works_count":63,"referenced_works":["https://openalex.org/W114517082","https://openalex.org/W1821462560","https://openalex.org/W1888890775","https://openalex.org/W1901616594","https://openalex.org/W2004147962","https://openalex.org/W2009796613","https://openalex.org/W2015525779","https://openalex.org/W2019759670","https://openalex.org/W2040128616","https://openalex.org/W2105934661","https://openalex.org/W2119144962","https://openalex.org/W2122646361","https://openalex.org/W2153204928","https://openalex.org/W2158344138","https://openalex.org/W2162072731","https://openalex.org/W2523246573","https://openalex.org/W2535547924","https://openalex.org/W2585804629","https://openalex.org/W2588191434","https://openalex.org/W2786217039","https://openalex.org/W2789934304","https://openalex.org/W2886014761","https://openalex.org/W2887654042","https://openalex.org/W2902251695","https://openalex.org/W2906043559","https://openalex.org/W2911892981","https://openalex.org/W2917832368","https://openalex.org/W2919115771","https://openalex.org/W2919372546","https://openalex.org/W2938315635","https://openalex.org/W2946758900","https://openalex.org/W2954228910","https://openalex.org/W2963319203","https://openalex.org/W2963363373","https://openalex.org/W2963470893","https://openalex.org/W2964217527","https://openalex.org/W2964259004","https://openalex.org/W2971544482","https://openalex.org/W2979310060","https://openalex.org/W2980137827","https://openalex.org/W2989131701","https://openalex.org/W2991389670","https://openalex.org/W3016735325","https://openalex.org/W3048439147","https://openalex.org/W3092664243","https://openalex.org/W3093065546","https://openalex.org/W3102327032","https://openalex.org/W3103572230","https://openalex.org/W3111112601","https://openalex.org/W3129093240","https://openalex.org/W3138335809","https://openalex.org/W3140077234","https://openalex.org/W3200278798","https://openalex.org/W3203827521","https://openalex.org/W3209828932","https://openalex.org/W4206386758","https://openalex.org/W4253208046","https://openalex.org/W4253699797","https://openalex.org/W4287363917","https://openalex.org/W4287642712","https://openalex.org/W4296396156","https://openalex.org/W4298078322","https://openalex.org/W6638523607"],"related_works":["https://openalex.org/W1543798151","https://openalex.org/W2965967938","https://openalex.org/W2292897598","https://openalex.org/W52302056","https://openalex.org/W2186439059","https://openalex.org/W3101543398","https://openalex.org/W2791204867","https://openalex.org/W2280540821","https://openalex.org/W2091908315","https://openalex.org/W4220731687"],"abstract_inverted_index":{"Matrix":[0,116,231],"Algebra":[1,117],"and":[2,29,40,57,84,118,134,140,151,156,165,188,192,207,218,239],"Deep":[3,234],"Neural":[4,235],"Networks":[5],"represent":[6],"foundational":[7],"classes":[8],"of":[9,114,124,175,202,214],"computational":[10],"algorithms":[11],"across":[12,121],"multiple":[13,55,115],"emerging":[14],"applications":[15,56,120,162],"like":[16],"Augmented":[17],"Reality":[18],"or":[19],"Virtual":[20],"Reality,":[21],"autonomous":[22],"navigation":[23],"(cars,":[24],"drones,":[25],"robots),":[26],"data":[27,48,88],"science,":[28],"various":[30],"artificial":[31],"intelligence-driven":[32],"solutions.":[33],"An":[34],"accelerator-based":[35],"architecture":[36,78],"can":[37],"provide":[38],"performance":[39,135],"energy":[41,212],"efficiency":[42,213],"supporting":[43],"fixed":[44],"functions":[45],"through":[46,89],"customized":[47],"paths.":[49],"However,":[50],"constrained":[51],"Edge":[52,167],"systems":[53],"requiring":[54],"diverse":[58],"matrix":[59,241],"operations":[60],"to":[61,109,153,222],"be":[62],"efficiently":[63],"supported,":[64],"cannot":[65],"afford":[66],"numerous":[67],"custom":[68],"accelerators.":[69],"In":[70],"this":[71],"article,":[72],"we":[73],"present":[74],"MxCore,":[75],"a":[76,95,122,171,181],"unified":[77],"that":[79],"comprises":[80],"tightly":[81],"coupled":[82],"vector":[83],"programmable":[85],"cores":[86],"sharing":[87],"highly":[90],"optimized":[91],"interconnects":[92],"along":[93,144],"with":[94,145],"configurable":[96],"hardware":[97],"scheduler":[98],"managing":[99],"the":[100,106,111,223],"co-execution.":[101],"We":[102],"submit":[103],"MxCore":[104,169,209],"as":[105,220],"generalized":[107],"approach":[108],"facilitate":[110],"flexible":[112],"acceleration":[113],"Deep-learning":[119],"range":[123],"sparsity":[125,147],"levels.":[126],"Unified":[127],"compute":[128,150],"resources":[129],"improve":[130],"overall":[131],"resource":[132],"utilization":[133],"per":[136],"unit":[137],"area.":[138],"Aggressive":[139],"novel":[141],"microarchitecture":[142],"techniques":[143],"block-level":[146],"support":[148],"optimize":[149],"data-reuse":[152],"minimize":[154],"bandwidth":[155],"power":[157],"requirements":[158],"enabling":[159],"ultra-low":[160],"latency":[161],"for":[163,228],"low-power":[164],"cost-sensitive":[166],"deployments.":[168],"requires":[170],"small":[172],"silicon":[173],"footprint":[174],"0.2068":[176],"mm":[177],"2":[178,196],",":[179,197],"in":[180],"modern":[182],"7-nm":[183],"process":[184],"at":[185],"1":[186],"GHz":[187],"achieves":[189],"(0.15":[190],"FP32":[191],"0.62":[193],"INT8)":[194],"TMAC/mm":[195],"dissipating":[198],"only":[199],"11.66":[200],"\u03bcW":[201],"leakage":[203],"power.":[204],"At":[205],"iso-technology":[206],"iso-frequency,":[208],"provides":[210],"an":[211],"651.4\u00d7,":[215],"159.9\u00d7,":[216],"104.8\u00d7,":[217],"124.2\u00d7":[219],"compared":[221],"128-core":[224],"Nvidia\u2019s":[225],"Maxwell":[226],"GPU":[227],"dense":[229],"General":[230],"Multiply,":[232],"sparse":[233],"Network,":[236],"Cholesky":[237],"decomposition,":[238],"triangular":[240],"solve":[242],"respectively.":[243]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2}],"updated_date":"2026-06-02T09:04:35.204637","created_date":"2025-10-10T00:00:00"}
