{"id":"https://openalex.org/W4386290345","doi":"https://doi.org/10.1145/3617688","title":"Smart-DNN+: A Memory-efficient Neural Networks Compression Framework for the Model Inference","display_name":"Smart-DNN+: A Memory-efficient Neural Networks Compression Framework for the Model Inference","publication_year":2023,"publication_date":"2023-08-30","ids":{"openalex":"https://openalex.org/W4386290345","doi":"https://doi.org/10.1145/3617688"},"language":"en","primary_location":{"id":"doi:10.1145/3617688","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617688","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617688","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3617688","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069261286","display_name":"Donglei Wu","orcid":"https://orcid.org/0000-0003-0358-0533"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Donglei Wu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0003-0358-0533","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101636404","display_name":"Weihao Yang","orcid":"https://orcid.org/0009-0002-6337-1768"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weihao Yang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0009-0002-6337-1768","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021153181","display_name":"Xiangyu Zou","orcid":"https://orcid.org/0000-0001-5104-8301"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyu Zou","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-5104-8301","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050928208","display_name":"Wen Xia","orcid":"https://orcid.org/0000-0003-4093-6391"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wen Xia","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen; Department of New Networks, Peng ChengLaboratory, Shenzhen; Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","Department of New Networks, Peng Cheng Laboratory, Shenzhen","Harbin Institute of Technology, Shenzhen"],"raw_orcid":"https://orcid.org/0000-0003-4093-6391","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen; Department of New Networks, Peng ChengLaboratory, Shenzhen; Guangdong Provincial Key Laboratory of Novel Security Intelligence Technologies, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I204983213"]},{"raw_affiliation_string":"Department of New Networks, Peng Cheng Laboratory, Shenzhen","institution_ids":["https://openalex.org/I4210136793"]},{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100626061","display_name":"Shiyi Li","orcid":"https://orcid.org/0000-0001-8206-6916"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiyi Li","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0001-8206-6916","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013685647","display_name":"Zhenbo Hu","orcid":"https://orcid.org/0000-0002-9453-7516"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenbo Hu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China"],"raw_orcid":"https://orcid.org/0000-0002-9453-7516","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085221201","display_name":"Weizhe Zhang","orcid":"https://orcid.org/0000-0003-4783-876X"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weizhe Zhang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen; Department of New Networks, Peng Cheng Laboratory, Shenzhen, China","Harbin Institute of Technology, Shenzhen"],"raw_orcid":"https://orcid.org/0000-0003-4783-876X","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen; Department of New Networks, Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113587199","display_name":"Binxing Fang","orcid":null},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Binxing Fang","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen; Department of New Networks, Peng Cheng Laboratory, Shenzhen, China","Harbin Institute of Technology, Shenzhen"],"raw_orcid":"https://orcid.org/0000-0003-0305-2132","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen; Department of New Networks, Peng Cheng Laboratory, Shenzhen, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I204983213"]},{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen","institution_ids":["https://openalex.org/I204983213"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5069261286"],"corresponding_institution_ids":["https://openalex.org/I204983213"],"apc_list":null,"apc_paid":null,"fwci":1.2521,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.82155194,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"20","issue":"4","first_page":"1","last_page":"24"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8240506649017334},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6254977583885193},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.5533410310745239},{"id":"https://openalex.org/keywords/quantization","display_name":"Quantization (signal processing)","score":0.5320461988449097},{"id":"https://openalex.org/keywords/floating-point","display_name":"Floating point","score":0.4267706573009491},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3351770043373108},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.23148202896118164}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8240506649017334},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6254977583885193},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.5533410310745239},{"id":"https://openalex.org/C28855332","wikidata":"https://www.wikidata.org/wiki/Q198099","display_name":"Quantization (signal processing)","level":2,"score":0.5320461988449097},{"id":"https://openalex.org/C84211073","wikidata":"https://www.wikidata.org/wiki/Q117879","display_name":"Floating point","level":2,"score":0.4267706573009491},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3351770043373108},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23148202896118164}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3617688","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617688","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617688","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3617688","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3617688","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3617688","source":{"id":"https://openalex.org/S26056741","display_name":"ACM Transactions on Architecture and Code Optimization","issn_l":"1544-3566","issn":["1544-3566","1544-3973"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Architecture and Code Optimization","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1823512066","display_name":null,"funder_award_id":"U22A2036 and 61972441","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G2810665394","display_name":null,"funder_award_id":"JCYJ20200109113427092","funder_id":"https://openalex.org/F4320336569","funder_display_name":"Shenzhen Science and Technology Innovation Program"},{"id":"https://openalex.org/G3247949200","display_name":null,"funder_award_id":"PCL2022A03","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4354376995","display_name":null,"funder_award_id":"RCYX20210609104510007","funder_id":"https://openalex.org/F4320336569","funder_display_name":"Shenzhen Science and Technology Innovation Program"},{"id":"https://openalex.org/G5379840081","display_name":null,"funder_award_id":"61972441","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7349572720","display_name":null,"funder_award_id":"2022B1212010005","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7505181816","display_name":null,"funder_award_id":"RCYX20210609104510007 and JCYJ20200109113427092","funder_id":"https://openalex.org/F4320336569","funder_display_name":"Shenzhen Science and Technology Innovation Program"},{"id":"https://openalex.org/G752701465","display_name":null,"funder_award_id":"U22A2036","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336569","display_name":"Shenzhen Science and Technology Innovation Program","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4386290345.pdf","grobid_xml":"https://content.openalex.org/works/W4386290345.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W1528999146","https://openalex.org/W1777016212","https://openalex.org/W1821462560","https://openalex.org/W2163605009","https://openalex.org/W2168809519","https://openalex.org/W2260663238","https://openalex.org/W2560674852","https://openalex.org/W2612445135","https://openalex.org/W2803549871","https://openalex.org/W2965373594","https://openalex.org/W2967893498","https://openalex.org/W2979515806","https://openalex.org/W2981852735","https://openalex.org/W2989289980","https://openalex.org/W3004905356","https://openalex.org/W3008666094","https://openalex.org/W3034368386","https://openalex.org/W3091900426","https://openalex.org/W3115348505","https://openalex.org/W3122378989","https://openalex.org/W3137147200","https://openalex.org/W3138154797","https://openalex.org/W3177126286","https://openalex.org/W3184606595","https://openalex.org/W4225426816","https://openalex.org/W4229374303","https://openalex.org/W4232385523","https://openalex.org/W4281660701","https://openalex.org/W4287254789","https://openalex.org/W4288089799","https://openalex.org/W4297775537","https://openalex.org/W4300059844","https://openalex.org/W4394670654","https://openalex.org/W6638523607"],"related_works":["https://openalex.org/W2055243143","https://openalex.org/W4321636575","https://openalex.org/W2357796999","https://openalex.org/W2045526782","https://openalex.org/W2741131631","https://openalex.org/W1986418932","https://openalex.org/W2156919374","https://openalex.org/W2979160909","https://openalex.org/W3009327594","https://openalex.org/W2803935332"],"abstract_inverted_index":{"Deep":[0,15],"Neural":[1,16],"Networks":[2],"(DNNs)":[3],"have":[4],"achieved":[5],"remarkable":[6],"success":[7],"in":[8,84,198,222],"various":[9],"real-world":[10],"applications.":[11],"However,":[12],"running":[13,194],"a":[14,110,136,140,172,205],"Network":[17],"(DNN)":[18],"typically":[19],"requires":[20],"hundreds":[21],"of":[22,24,57,101,124,156,260,279],"megabytes":[23],"memory":[25,55,91,122,214,223,250],"footprints,":[26],"making":[27],"it":[28],"challenging":[29],"to":[30,96,143,158,174,209,276],"deploy":[31],"on":[32,239],"resource-constrained":[33],"platforms":[34],"such":[35,46],"as":[36,47],"mobile":[37],"devices":[38],"and":[39,50,88,224,242],"IoT.":[40],"Although":[41],"mainstream":[42],"DNNs":[43,221,241],"compression":[44,71,92],"techniques":[45],"pruning,":[48],"distillation,":[49],"quantization":[51,178],"can":[52],"reduce":[53,145,211],"the":[54,74,98,121,129,146,151,159,176,182,188,195,199,212,219,228,258,261,265,272],"overhead":[56,215],"model":[58,70,85,130,147,167,232],"parameters":[59],"during":[60],"DNN":[61,76,102,113,125,154,197,281],"inference,":[62],"they":[63],"suffer":[64],"from":[65],"three":[66],"limitations:":[67],"(i)":[68],"low":[69],"ratio":[72,93],"for":[73,231],"lightweight":[75,111],"structures":[77],"with":[78,139,257],"little":[79],"redundancy,":[80],"(ii)":[81],"potential":[82],"degradation":[83],"inference":[86,114,126,233,266,273,282],"accuracy,":[87],"(iii)":[89],"inadequate":[90],"is":[94],"attributable":[95],"ignoring":[97],"layering":[99],"property":[100],"inference.":[103],"To":[104,165],"address":[105],"these":[106],"issues,":[107],"we":[108],"propose":[109],"memory-efficient":[112],"framework":[115],"called":[116],"Smart-DNN+,":[117],"which":[118],"significantly":[119],"reduces":[120,271],"costs":[123,251],"without":[127,263],"degrading":[128,264],"quality.":[131],"Specifically,":[132],"\u2460":[133],"Smart-DNN+":[134,170,203,246,269],"applies":[135],"layerwise":[137],"binary-quantizer":[138],"remapping":[141],"mechanism":[142],"greatly":[144,210],"size":[148],"by":[149,163,180,216,235],"quantizing":[150],"typical":[152],"floating-point":[153,185],"weights":[155],"32-bit":[157],"1-bit":[160],"signs":[161],"layer":[162,234],"layer.":[164,236],"maintain":[166],"quality,":[168],"\u2461":[169],"employs":[171],"bucket-encoder":[173],"keep":[175],"compressed":[177,196,220],"error":[179],"encoding":[181],"multiple":[183],"similar":[184],"residuals":[186],"into":[187],"same":[189],"integer":[190],"bucket":[191],"IDs.":[192],"When":[193],"user\u2019s":[200],"device,":[201],"\u2462":[202],"utilizes":[204],"partially":[206],"decompressing":[207,227],"strategy":[208],"required":[213,229],"first":[217],"loading":[218],"then":[225],"dynamically":[226],"materials":[230],"Experimental":[237],"results":[238],"popular":[240],"datasets":[243],"demonstrate":[244],"that":[245,278],"achieves":[247],"lower":[248,253],"0.17%\u20130.92%":[249],"at":[252],"runtime":[254,274],"overheads":[255],"compared":[256],"states":[259],"art":[262],"accuracy.":[267],"Moreover,":[268],"potentially":[270],"up":[275],"2.04\u00d7":[277],"conventional":[280],"workflow.":[283]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
