{"id":"https://openalex.org/W4399121259","doi":"https://doi.org/10.1145/3636534.3649391","title":"FlexNN: Efficient and Adaptive DNN Inference on Memory-Constrained Edge Devices","display_name":"FlexNN: Efficient and Adaptive DNN Inference on Memory-Constrained Edge Devices","publication_year":2024,"publication_date":"2024-05-29","ids":{"openalex":"https://openalex.org/W4399121259","doi":"https://doi.org/10.1145/3636534.3649391"},"language":"en","primary_location":{"id":"doi:10.1145/3636534.3649391","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3636534.3649391","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3636534.3649391","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th Annual International Conference on Mobile Computing and Networking","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3636534.3649391","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003481592","display_name":"X Y Li","orcid":"https://orcid.org/0009-0001-5341-2303"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xiangyu Li","raw_affiliation_strings":["Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100628298","display_name":"Yuanchun Li","orcid":"https://orcid.org/0000-0002-1591-2526"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanchun Li","raw_affiliation_strings":["Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087562099","display_name":"Yuanzhe Li","orcid":"https://orcid.org/0000-0003-0594-2745"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanzhe Li","raw_affiliation_strings":["Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101534161","display_name":"Ting Cao","orcid":"https://orcid.org/0000-0002-9107-013X"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ting Cao","raw_affiliation_strings":["Microsoft Research, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102880548","display_name":"Yunxin Liu","orcid":"https://orcid.org/0000-0001-7352-8955"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunxin Liu","raw_affiliation_strings":["Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Institute for AI Industry Research (AIR), Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5003481592"],"corresponding_institution_ids":["https://openalex.org/I99065089"],"apc_list":null,"apc_paid":null,"fwci":7.0942,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.97908859,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"709","last_page":"723"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10502","display_name":"Advanced Memory and Neural Computing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8434132933616638},{"id":"https://openalex.org/keywords/memory-management","display_name":"Memory management","score":0.6215745806694031},{"id":"https://openalex.org/keywords/adaptive-memory","display_name":"Adaptive memory","score":0.5923550724983215},{"id":"https://openalex.org/keywords/memory-footprint","display_name":"Memory footprint","score":0.5506925582885742},{"id":"https://openalex.org/keywords/flat-memory-model","display_name":"Flat memory model","score":0.5249947309494019},{"id":"https://openalex.org/keywords/interleaved-memory","display_name":"Interleaved memory","score":0.5157275795936584},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.5038756728172302},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.502886950969696},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5002059936523438},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4753781855106354},{"id":"https://openalex.org/keywords/extended-memory","display_name":"Extended memory","score":0.431145578622818},{"id":"https://openalex.org/keywords/embedded-system","display_name":"Embedded system","score":0.383461594581604},{"id":"https://openalex.org/keywords/computer-architecture","display_name":"Computer architecture","score":0.3451225161552429},{"id":"https://openalex.org/keywords/semiconductor-memory","display_name":"Semiconductor memory","score":0.3255354166030884},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19833943247795105},{"id":"https://openalex.org/keywords/computer-hardware","display_name":"Computer hardware","score":0.1570165455341339},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.13709619641304016},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.10812768340110779}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8434132933616638},{"id":"https://openalex.org/C176649486","wikidata":"https://www.wikidata.org/wiki/Q2308807","display_name":"Memory management","level":3,"score":0.6215745806694031},{"id":"https://openalex.org/C30390489","wikidata":"https://www.wikidata.org/wiki/Q4680748","display_name":"Adaptive memory","level":3,"score":0.5923550724983215},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.5506925582885742},{"id":"https://openalex.org/C57863822","wikidata":"https://www.wikidata.org/wiki/Q905488","display_name":"Flat memory model","level":4,"score":0.5249947309494019},{"id":"https://openalex.org/C63511323","wikidata":"https://www.wikidata.org/wiki/Q908936","display_name":"Interleaved memory","level":4,"score":0.5157275795936584},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.5038756728172302},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.502886950969696},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5002059936523438},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4753781855106354},{"id":"https://openalex.org/C171675096","wikidata":"https://www.wikidata.org/wiki/Q1143380","display_name":"Extended memory","level":4,"score":0.431145578622818},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.383461594581604},{"id":"https://openalex.org/C118524514","wikidata":"https://www.wikidata.org/wiki/Q173212","display_name":"Computer architecture","level":1,"score":0.3451225161552429},{"id":"https://openalex.org/C98986596","wikidata":"https://www.wikidata.org/wiki/Q1143031","display_name":"Semiconductor memory","level":2,"score":0.3255354166030884},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19833943247795105},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.1570165455341339},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.13709619641304016},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.10812768340110779},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3636534.3649391","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3636534.3649391","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3636534.3649391","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th Annual International Conference on Mobile Computing and Networking","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3636534.3649391","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3636534.3649391","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3636534.3649391","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th Annual International Conference on Mobile Computing and Networking","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4399121259.pdf"},"referenced_works_count":32,"referenced_works":["https://openalex.org/W1951319388","https://openalex.org/W2194775991","https://openalex.org/W2325939864","https://openalex.org/W2489529491","https://openalex.org/W2756489700","https://openalex.org/W2797558164","https://openalex.org/W2886851211","https://openalex.org/W2897268228","https://openalex.org/W2934625602","https://openalex.org/W2955819484","https://openalex.org/W2963125010","https://openalex.org/W2963163009","https://openalex.org/W2963918968","https://openalex.org/W2967733054","https://openalex.org/W2979832172","https://openalex.org/W2981207549","https://openalex.org/W2984618279","https://openalex.org/W3000280594","https://openalex.org/W3012479151","https://openalex.org/W3012514909","https://openalex.org/W3032945613","https://openalex.org/W3035130950","https://openalex.org/W3043571714","https://openalex.org/W3093410479","https://openalex.org/W3102661353","https://openalex.org/W3127161477","https://openalex.org/W3130423852","https://openalex.org/W3211149853","https://openalex.org/W4282974189","https://openalex.org/W4312060068","https://openalex.org/W4387212784","https://openalex.org/W6950140870"],"related_works":["https://openalex.org/W2334181344","https://openalex.org/W2558276258","https://openalex.org/W4293054943","https://openalex.org/W2401095501","https://openalex.org/W4243333834","https://openalex.org/W2753615087","https://openalex.org/W2587873888","https://openalex.org/W2155373950","https://openalex.org/W4236777984","https://openalex.org/W1527643973"],"abstract_inverted_index":{"Due":[0],"to":[1,27,48,67,86,145,180,182],"the":[2,34,50,88,101,107,114,195,210],"popularity":[3],"of":[4,53,97,110],"deep":[5,73],"neural":[6],"networks":[7],"(DNNs)":[8],"and":[9,16,64,71,113,127,150,160],"considerations":[10],"over":[11],"network":[12],"overhead,":[13],"data":[14],"privacy,":[15],"inference":[17,134],"latency,":[18,206],"there":[19],"is":[20,178],"a":[21,38,81,140,202],"growing":[22],"interest":[23],"in":[24,30,205],"deploying":[25],"DNNs":[26],"edge":[28],"devices":[29],"recent":[31,84],"years.":[32],"However,":[33],"limited":[35],"memory":[36,51,79,89,103,111,115,129,148,152,184,196],"becomes":[37],"major":[39],"bottleneck":[40],"for":[41,132],"on-device":[42],"DNN":[43,133],"deployment,":[44],"making":[45],"it":[46],"crucial":[47],"reduce":[49,194],"footprint":[52,104],"DNN.":[54],"The":[55,171],"mainstream":[56],"model":[57,166],"customization":[58],"solutions":[59],"require":[60],"intensive":[61],"deployment":[62],"efforts":[63],"may":[65],"lead":[66],"severe":[68],"accuracy":[69],"degradation,":[70],"existing":[72],"learning":[74],"(DL)":[75],"frameworks":[76],"don't":[77],"take":[78],"as":[80,207],"priority.":[82],"Besides,":[83],"works":[85],"enhance":[87],"management":[90,130,153],"scheme":[91],"cannot":[92],"be":[93],"directly":[94],"applied":[95],"because":[96],"several":[98],"challenges,":[99,121],"including":[100],"unbalanced":[102],"across":[105],"layers,":[106],"inevitable":[108],"overhead":[109],"management,":[112],"budget":[116],"dynamicity.":[117],"To":[118],"tackle":[119],"these":[120],"we":[122],"introduce":[123],"FlexNN,":[124],"an":[125],"efficient":[126],"adaptive":[128],"framework":[131],"on":[135,168,213],"memory-constrained":[136],"devices.":[137,170],"FlexNN":[138,157,192],"uses":[139],"slicing-loading-computing":[141],"joint":[142],"planning":[143],"approach,":[144],"achieve":[146],"optimal":[147,187],"utilization":[149],"minimal":[151],"overhead.":[154],"We":[155],"implemented":[156],"atop":[158],"NCNN,":[159],"conducted":[161],"comprehensive":[162],"evaluations":[163],"with":[164,186,200,209],"common":[165],"architectures":[167],"various":[169],"results":[172],"have":[173],"shown":[174],"that":[175],"our":[176],"approach":[177],"able":[179],"adapt":[181],"different":[183],"constraints":[185],"latency-memory":[188],"trade-offs.":[189],"For":[190],"example,":[191],"can":[193],"consumption":[197],"by":[198],"93.81%":[199],"only":[201],"3.64%":[203],"increase":[204],"compared":[208],"original":[211],"NCNN":[212],"smartphones.":[214]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":23},{"year":2024,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
