{"id":"https://openalex.org/W4388505295","doi":"https://doi.org/10.1109/tvlsi.2023.3327110","title":"HIPU: A Hybrid Intelligent Processing Unit With Fine-Grained ISA for Real-Time Deep Neural Network Inference Applications","display_name":"HIPU: A Hybrid Intelligent Processing Unit With Fine-Grained ISA for Real-Time Deep Neural Network Inference Applications","publication_year":2023,"publication_date":"2023-11-08","ids":{"openalex":"https://openalex.org/W4388505295","doi":"https://doi.org/10.1109/tvlsi.2023.3327110"},"language":"en","primary_location":{"id":"doi:10.1109/tvlsi.2023.3327110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2023.3327110","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5006910313","display_name":"Wenzhe Zhao","orcid":"https://orcid.org/0000-0002-7001-2125"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wenzhe Zhao","raw_affiliation_strings":["National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102411735","display_name":"Guoming Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guoming Yang","raw_affiliation_strings":["National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032104477","display_name":"Tian Xia","orcid":"https://orcid.org/0000-0002-2520-3731"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tian Xia","raw_affiliation_strings":["National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100405410","display_name":"Fei Chen","orcid":"https://orcid.org/0000-0002-6988-492X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fei Chen","raw_affiliation_strings":["National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047405956","display_name":"Nanning Zheng","orcid":"https://orcid.org/0000-0003-1608-8257"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nanning Zheng","raw_affiliation_strings":["National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044243518","display_name":"Pengju Ren","orcid":"https://orcid.org/0000-0003-1163-2014"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pengju Ren","raw_affiliation_strings":["National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China"],"affiliations":[{"raw_affiliation_string":"National Key Laboratory of Human-Machine Hybrid Augmented Intelligence, the National Engineering Research Center of Visual Information and Applications, and the Institute of Artificial Intelligence and Robotics, Xi&#x2019;an Jiaotong University, Xi&#x2019;an, Shaanxi, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5006910313"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5952,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.69860243,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":"31","issue":"12","first_page":"1980","last_page":"1993"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12702","display_name":"Brain Tumor Detection and Classification","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/2808","display_name":"Neurology"},"field":{"id":"https://openalex.org/fields/28","display_name":"Neuroscience"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7962598204612732},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6685003638267517},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.5382895469665527},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5365056991577148},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5354467034339905},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.5050998330116272},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4688241183757782},{"id":"https://openalex.org/keywords/schedule","display_name":"Schedule","score":0.4636211693286896},{"id":"https://openalex.org/keywords/simd","display_name":"SIMD","score":0.4524170756340027},{"id":"https://openalex.org/keywords/instruction-set","display_name":"Instruction set","score":0.43421903252601624},{"id":"https://openalex.org/keywords/computer-engineering","display_name":"Computer engineering","score":0.41115590929985046},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.3988250494003296},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3515106439590454},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18615630269050598},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.12877506017684937}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7962598204612732},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6685003638267517},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.5382895469665527},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5365056991577148},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5354467034339905},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.5050998330116272},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4688241183757782},{"id":"https://openalex.org/C68387754","wikidata":"https://www.wikidata.org/wiki/Q7271585","display_name":"Schedule","level":2,"score":0.4636211693286896},{"id":"https://openalex.org/C150552126","wikidata":"https://www.wikidata.org/wiki/Q339387","display_name":"SIMD","level":2,"score":0.4524170756340027},{"id":"https://openalex.org/C202491316","wikidata":"https://www.wikidata.org/wiki/Q272683","display_name":"Instruction set","level":2,"score":0.43421903252601624},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.41115590929985046},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3988250494003296},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3515106439590454},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18615630269050598},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.12877506017684937}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tvlsi.2023.3327110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvlsi.2023.3327110","pdf_url":null,"source":{"id":"https://openalex.org/S37538908","display_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","issn_l":"1063-8210","issn":["1063-8210","1557-9999"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Very Large Scale Integration (VLSI) Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1900829300","display_name":null,"funder_award_id":"2022YFB4500500","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G3833898155","display_name":null,"funder_award_id":"62088102","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5340218887","display_name":null,"funder_award_id":"62302381","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8788828744","display_name":null,"funder_award_id":"2022ZDLGY01-08","funder_id":"https://openalex.org/F4320336350","funder_display_name":"Key Research and Development Projects of Shaanxi Province"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null},{"id":"https://openalex.org/F4320336350","display_name":"Key Research and Development Projects of Shaanxi Province","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2515287984","https://openalex.org/W2522548197","https://openalex.org/W2540279855","https://openalex.org/W2606722458","https://openalex.org/W2618530766","https://openalex.org/W2909971279","https://openalex.org/W2936278485","https://openalex.org/W2962874694","https://openalex.org/W2963122961","https://openalex.org/W2971734772","https://openalex.org/W2981207549","https://openalex.org/W2982219368","https://openalex.org/W2991384465","https://openalex.org/W2994193159","https://openalex.org/W3007788310","https://openalex.org/W3034785488","https://openalex.org/W3042734658","https://openalex.org/W3094218199","https://openalex.org/W3157657667","https://openalex.org/W3194490872","https://openalex.org/W4293584584","https://openalex.org/W4294310665","https://openalex.org/W4306755820","https://openalex.org/W4313465452","https://openalex.org/W4384947576","https://openalex.org/W6750227808","https://openalex.org/W6751349269","https://openalex.org/W6966527820"],"related_works":["https://openalex.org/W2099629705","https://openalex.org/W2045177269","https://openalex.org/W2994245508","https://openalex.org/W4242172182","https://openalex.org/W2116582200","https://openalex.org/W2082875307","https://openalex.org/W4237780868","https://openalex.org/W4285302443","https://openalex.org/W4396938741","https://openalex.org/W2019451907"],"abstract_inverted_index":{"Neural":[0],"network":[1,47],"algorithms":[2],"have":[3,59],"shown":[4],"superior":[5],"performance":[6,26,176,210],"over":[7],"conventional":[8],"algorithms,":[9],"leading":[10],"to":[11,66,70,116,141,151],"the":[12,40,72,82,96,118,130,138,143,148,161,192],"designation":[13],"and":[14,136,173],"deployment":[15],"of":[16,34,86,98,211],"dedicated":[17],"accelerators":[18,23,80],"in":[19,44],"practical":[20],"scenarios.":[21],"Coarse-grained":[22],"achieve":[24],"high":[25,165,175],"but":[27],"can":[28],"support":[29],"only":[30],"a":[31,61,110,124,208],"limited":[32],"number":[33],"predesigned":[35],"operators,":[36],"which":[37],"cannot":[38],"cover":[39,71],"flexible":[41],"operators":[42,172],"emerging":[43],"modern":[45],"neural":[46],"algorithms.":[48],"Therefore,":[49],"fine-grained":[50,79],"accelerators,":[51,58],"such":[52],"as":[53,93,95],"instruction":[54,134],"set":[55,135],"architecture":[56],"(ISA)-based":[57],"become":[60],"hot":[62],"research":[63],"topic":[64],"due":[65],"their":[67],"sufficient":[68],"flexibility":[69],"unpredefined":[73],"operators.":[74],"The":[75,156],"main":[76],"challenges":[77],"for":[78,169,204],"include":[81],"undesired":[83],"long":[84],"delays":[85],"single-image":[87],"inference":[88,149],"when":[89,102],"performing":[90],"multibatch":[91],"inference,":[92],"well":[94],"difficulty":[97],"meeting":[99],"real-time":[100],"constraints":[101],"processing":[103,113],"multiple":[104],"tasks":[105],"simultaneously.":[106],"This":[107],"article":[108],"proposes":[109],"hybrid":[111],"intelligent":[112],"unit":[114],"(HIPU)":[115],"address":[117],"aforementioned":[119],"problems.":[120],"Specifically,":[121],"we":[122],"design":[123,140],"novel":[125],"conversion-free":[126],"data":[127],"format,":[128],"expanding":[129],"single-instruction":[131],"multiple-data":[132],"(SIMD)":[133],"optimizing":[137],"microarchitecture":[139],"improve":[142],"performance.":[144],"We":[145],"also":[146],"arrange":[147],"schedule":[150],"guarantee":[152],"scalability":[153],"on":[154],"multicores.":[155],"experimental":[157],"results":[158],"show":[159],"that":[160],"proposed":[162,193],"accelerator":[163,194],"maintains":[164],"multiply\u2013accumulation":[166],"(MAC)":[167],"utilization":[168],"all":[170],"common":[171],"achieves":[174],"with":[177,207],"4\u2013":[178],"<inline-formula":[179],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[180],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">":[181],"<tex-math":[182],"notation=\"LaTeX\">$7\\times":[183],"$":[184],"</tex-math></inline-formula>":[185],"speedup":[186],"against":[187],"NVIDIA":[188],"RTX2080Ti":[189],"GPU.":[190],"Finally,":[191],"is":[195],"manufactured":[196],"using":[197],"TSMC":[198],"28-nm":[199],"technology,":[200],"achieving":[201],"1":[202],"GHz":[203],"each":[205],"core,":[206],"peak":[209],"13":[212],"TOPS.":[213]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3}],"updated_date":"2026-04-14T08:04:32.555800","created_date":"2025-10-10T00:00:00"}
