{"id":"https://openalex.org/W2921704024","doi":"https://doi.org/10.1109/access.2019.2905302","title":"A Novel DSP Architecture for Scientific Computing and Deep Learning","display_name":"A Novel DSP Architecture for Scientific Computing and Deep Learning","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2921704024","doi":"https://doi.org/10.1109/access.2019.2905302","mag":"2921704024"},"language":"en","primary_location":{"id":"doi:10.1109/access.2019.2905302","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2019.2905302","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8600701/08668496.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/8600701/08668496.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100749330","display_name":"Chao Yang","orcid":"https://orcid.org/0000-0001-9001-0656"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chao Yang","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0001-9001-0656","affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100644317","display_name":"Shuming Chen","orcid":"https://orcid.org/0000-0003-4441-2202"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuming Chen","raw_affiliation_strings":["State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"State Key Laboratory of High Performance Computing, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100410010","display_name":"Jian Zhang","orcid":"https://orcid.org/0000-0002-8353-6243"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Zhang","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031574429","display_name":"Zhao Lv","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhao Lv","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":"https://orcid.org/0000-0003-4806-8461","affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076598663","display_name":"Zhi Wang","orcid":"https://orcid.org/0000-0003-0743-5544"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhi Wang","raw_affiliation_strings":["College of Computer, National University of Defense Technology, Changsha, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer, National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100749330"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":2.7152,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.89770857,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"7","issue":null,"first_page":"36413","last_page":"36425"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10904","display_name":"Embedded Systems Design Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8522763252258301},{"id":"https://openalex.org/keywords/flops","display_name":"FLOPS","score":0.804172158241272},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.6710290312767029},{"id":"https://openalex.org/keywords/digital-signal-processing","display_name":"Digital signal processing","score":0.6138852834701538},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.6087494492530823},{"id":"https://openalex.org/keywords/supercomputer","display_name":"Supercomputer","score":0.5630847215652466},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5045620203018188},{"id":"https://openalex.org/keywords/electrical-efficiency","display_name":"Electrical efficiency","score":0.486184298992157},{"id":"https://openalex.org/keywords/very-long-instruction-word","display_name":"Very long instruction word","score":0.47757580876350403},{"id":"https://openalex.org/keywords/power","display_name":"Power (physics)","score":0.277574360370636},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.26676076650619507}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8522763252258301},{"id":"https://openalex.org/C3826847","wikidata":"https://www.wikidata.org/wiki/Q188768","display_name":"FLOPS","level":2,"score":0.804172158241272},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.6710290312767029},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.6138852834701538},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.6087494492530823},{"id":"https://openalex.org/C83283714","wikidata":"https://www.wikidata.org/wiki/Q121117","display_name":"Supercomputer","level":2,"score":0.5630847215652466},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5045620203018188},{"id":"https://openalex.org/C118993495","wikidata":"https://www.wikidata.org/wiki/Q5042828","display_name":"Electrical efficiency","level":3,"score":0.486184298992157},{"id":"https://openalex.org/C170595534","wikidata":"https://www.wikidata.org/wiki/Q249743","display_name":"Very long instruction word","level":2,"score":0.47757580876350403},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.277574360370636},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.26676076650619507},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2019.2905302","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2019.2905302","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8600701/08668496.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:2621ca542c5a49d4a7319ff38d974ed6","is_oa":true,"landing_page_url":"https://doaj.org/article/2621ca542c5a49d4a7319ff38d974ed6","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 7, Pp 36413-36425 (2019)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2019.2905302","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2019.2905302","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/8600701/08668496.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/7","score":0.8100000023841858,"display_name":"Affordable and clean energy"}],"awards":[{"id":"https://openalex.org/G3839400729","display_name":null,"funder_award_id":"61602493","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5815876613","display_name":null,"funder_award_id":"61572025","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2921704024.pdf","grobid_xml":"https://content.openalex.org/works/W2921704024.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W47391476","https://openalex.org/W639708223","https://openalex.org/W1530827177","https://openalex.org/W1777209000","https://openalex.org/W1970013283","https://openalex.org/W1985956780","https://openalex.org/W1999193585","https://openalex.org/W2029644811","https://openalex.org/W2043275593","https://openalex.org/W2049875313","https://openalex.org/W2073061372","https://openalex.org/W2079436606","https://openalex.org/W2079726719","https://openalex.org/W2103290993","https://openalex.org/W2113893001","https://openalex.org/W2115172404","https://openalex.org/W2140906558","https://openalex.org/W2153514540","https://openalex.org/W2157134596","https://openalex.org/W2170996201","https://openalex.org/W2314480985","https://openalex.org/W2408559562","https://openalex.org/W2514035975","https://openalex.org/W2563705555","https://openalex.org/W2620620689","https://openalex.org/W2622826443","https://openalex.org/W2750793847","https://openalex.org/W2776859894","https://openalex.org/W2792542990","https://openalex.org/W2806928165","https://openalex.org/W2811065464","https://openalex.org/W4229666556","https://openalex.org/W4242759096","https://openalex.org/W6631399576"],"related_works":["https://openalex.org/W1855765675","https://openalex.org/W2115688358","https://openalex.org/W3046859795","https://openalex.org/W1503212777","https://openalex.org/W1505902692","https://openalex.org/W2072728786","https://openalex.org/W2146636354","https://openalex.org/W2066454338","https://openalex.org/W111037196","https://openalex.org/W2751061704"],"abstract_inverted_index":{"Exascale":[0],"computing":[1,172],"requires":[2],"accelerators":[3],"with":[4,108,122,136],"ultrahigh":[5],"power":[6,21,120,146],"efficiency.":[7],"Digital":[8],"signal":[9,178],"processors":[10,16],"(DSPs),":[11],"the":[12,27,43,85,97,109,118,144,154,157,165,175,182,221,226],"most":[13],"important":[14],"embedded":[15],"widely":[17],"known":[18],"for":[19,54,78,90,170],"high":[20],"efficiency,":[22],"are":[23,103],"rarely":[24],"explored":[25],"in":[26,156,186],"HPC":[28],"community.":[29],"We":[30,130,202],"propose":[31],"a":[32,73,112,123,133],"64-bit":[33],"general":[34],"purpose":[35],"DSP":[36],"architecture,":[37,75],"FT-Matrix2000,":[38],"which":[39,151],"not":[40],"only":[41],"integrates":[42],"main":[44],"features":[45],"of":[46,101,125,177,184,225],"DSPs":[47],"but":[48],"also":[49,192],"presents":[50],"several":[51],"novel":[52],"enhancements":[53],"scientific":[55,91,171],"computing.":[56,92],"The":[57,69,217],"FT-Matrix2000":[58,102,116,166,185],"architecture":[59,167,228],"comprises":[60],"multiple":[61],"FT-Matrix2":[62,70],"cores":[63],"and":[64,82,87,99,105,111,127,143,188,213,237,243],"optional":[65],"RISC":[66],"CPU":[67],"cores.":[68],"core":[71],"utilizes":[72],"VLIW+SIMD":[74],"provides":[76],"support":[77,197],"double":[79],"precision":[80],"operations,":[81],"optimizes":[83],"both":[84],"data":[86],"control":[88],"path":[89],"Our":[93],"evaluations":[94],"show":[95],"that":[96,164,220],"performance":[98,145],"efficiency":[100,121,140,176,224],"1107GFLOPS":[104],"92.25%.":[106],"Compared":[107],"MIC":[110],"40nm":[113],"process":[114],"GPU,":[115],"improves":[117],"GEMM":[119],"factor":[124],"1.49":[126],"2.68,":[128],"respectively.":[129],"build":[131],"up":[132],"prototype":[134],"supercomputer":[135],"FT-Matrix2000/12.":[137],"Its":[138],"HPL":[139],"achieves":[141],"62.2%,":[142],"ratio":[147],"is":[148,168,232],"5.33":[149],"GFLOPS/W,":[150],"can":[152],"rank":[153],"fourth":[155],"latest":[158],"Green500":[159],"list.":[160],"These":[161],"results":[162],"validate":[163],"suitable":[169],"while":[173],"maintaining":[174],"processing":[179],"well.":[180],"Moreover,":[181],"enhancement":[183],"vector":[187],"matrix":[189],"related":[190,200],"computations":[191],"enable":[193],"it":[194],"to":[195],"efficiently":[196],"deep":[198],"learning":[199],"applications.":[201],"have":[203],"implemented":[204],"some":[205],"typical":[206],"DCNN":[207],"models":[208],"on":[209,230],"FT-Matrx2000,":[210],"NVIDIA":[211],"GPUs,":[212],"Vision":[214,245],"P6":[215,246],"DSP.":[216,247],"experiments":[218],"demonstrate":[219],"average":[222],"computation":[223],"proposed":[227],"based":[229],"Matrix2000":[231],"about":[233],"20":[234],"~":[235],"35%":[236],"8%":[238],"higher":[239],"respectively":[240],"than":[241],"GPUs":[242],"Cadence":[244]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4}],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-10-10T00:00:00"}
