{"id":"https://openalex.org/W4412374994","doi":"https://doi.org/10.1109/tpds.2025.3587445","title":"m2LLM: A Multi-Dimensional Optimization Framework for LLM Inference on Mobile Devices","display_name":"m2LLM: A Multi-Dimensional Optimization Framework for LLM Inference on Mobile Devices","publication_year":2025,"publication_date":"2025-07-09","ids":{"openalex":"https://openalex.org/W4412374994","doi":"https://doi.org/10.1109/tpds.2025.3587445"},"language":"en","primary_location":{"id":"doi:10.1109/tpds.2025.3587445","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3587445","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5118968447","display_name":"Kaiyuan Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":true,"raw_author_name":"Kaiyuan Liu","raw_affiliation_strings":["Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, China","Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, Macau SAR, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, China","institution_ids":["https://openalex.org/I204512498"]},{"raw_affiliation_string":"Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, Macau SAR, China","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101677080","display_name":"Xiaobo Zhou","orcid":"https://orcid.org/0009-0004-9500-3390"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Xiaobo Zhou","raw_affiliation_strings":["Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, China","Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, Macau SAR, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, China","institution_ids":["https://openalex.org/I204512498"]},{"raw_affiliation_string":"Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, Macau SAR, China","institution_ids":["https://openalex.org/I204512498"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032580866","display_name":"Li Li","orcid":"https://orcid.org/0000-0002-2044-8289"},"institutions":[{"id":"https://openalex.org/I204512498","display_name":"University of Macau","ror":"https://ror.org/01r4q9n85","country_code":"MO","type":"education","lineage":["https://openalex.org/I204512498"]}],"countries":["MO"],"is_corresponding":false,"raw_author_name":"Li Li","raw_affiliation_strings":["Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, China","Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, Macau SAR, China"],"affiliations":[{"raw_affiliation_string":"Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, China","institution_ids":["https://openalex.org/I204512498"]},{"raw_affiliation_string":"Faculty of Science and Technology, State Key Laboratory of IoTSC, University of Macau, Taipa, Macau SAR, China","institution_ids":["https://openalex.org/I204512498"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5118968447"],"corresponding_institution_ids":["https://openalex.org/I204512498"],"apc_list":null,"apc_paid":null,"fwci":3.1726,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.92166867,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"36","issue":"10","first_page":"2014","last_page":"2029"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9283000230789185,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9283000230789185,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.5072295665740967},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5058135986328125}],"concepts":[{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.5072295665740967},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5058135986328125}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tpds.2025.3587445","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tpds.2025.3587445","pdf_url":null,"source":{"id":"https://openalex.org/S97130795","display_name":"IEEE Transactions on Parallel and Distributed Systems","issn_l":"1045-9219","issn":["1045-9219","1558-2183","2161-9883"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Parallel and Distributed Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2752579606","https://openalex.org/W2963736842","https://openalex.org/W2963963993","https://openalex.org/W3034368386","https://openalex.org/W3034999214","https://openalex.org/W3124893884","https://openalex.org/W3139521791","https://openalex.org/W4282974189","https://openalex.org/W4318541554","https://openalex.org/W4385567182","https://openalex.org/W4386083031","https://openalex.org/W4389519226","https://openalex.org/W4389523718","https://openalex.org/W4392353733","https://openalex.org/W4393160423","https://openalex.org/W4394998727","https://openalex.org/W4395073472","https://openalex.org/W4398152570","https://openalex.org/W4401211704","https://openalex.org/W4402671835","https://openalex.org/W4404784116","https://openalex.org/W4406650295","https://openalex.org/W6678262379","https://openalex.org/W6682631176","https://openalex.org/W6727208969","https://openalex.org/W6761205521","https://openalex.org/W6766673545","https://openalex.org/W6769243733","https://openalex.org/W6772383348","https://openalex.org/W6773815586","https://openalex.org/W6774015895","https://openalex.org/W6811340617","https://openalex.org/W6846164622","https://openalex.org/W6850927664","https://openalex.org/W6852648873","https://openalex.org/W6852962002","https://openalex.org/W6854094408","https://openalex.org/W6856239413","https://openalex.org/W6856565980","https://openalex.org/W6857799723","https://openalex.org/W6857944484","https://openalex.org/W6860155063","https://openalex.org/W6864103248","https://openalex.org/W6869772322","https://openalex.org/W6893640197","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2935759653","https://openalex.org/W3105167352","https://openalex.org/W54078636","https://openalex.org/W2954470139","https://openalex.org/W1501425562","https://openalex.org/W2902782467","https://openalex.org/W3084825885"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4],"reshaping":[5],"mobile":[6,12,23,224],"AI.":[7],"Directly":[8],"deploying":[9],"LLMs":[10],"on":[11,230],"devices":[13],"is":[14,202,228],"an":[15,59,139],"emerging":[16],"paradigm":[17],"that":[18,62],"can":[19,183],"widely":[20],"support":[21],"different":[22],"applications":[24],"while":[25,188,216],"preserving":[26],"data":[27],"privacy.":[28],"However,":[29],"intensive":[30],"memory":[31,146,160],"footprint,":[32],"long":[33],"inference":[34,43,72,125],"latency":[35],"and":[36,82,108,147,236,248],"high":[37],"energy":[38,83,214,250],"consumption":[39,215],"severely":[40],"bottlenecks":[41],"on-device":[42,70],"of":[44,259],"LLM":[45,71],"in":[46,73,120,176,186,204,220],"real-world":[47],"scenarios.":[48],"In":[49],"response":[50],"to":[51,75,122,143,152,178,206,239],"these":[52],"challenges,":[53],"this":[54],"work":[55],"introduces":[56],"m<inline-formula><tex-math":[57,86,136,170,226,242],"notation=\"LaTeX\">$^{2}$</tex-math></inline-formula>LLM,":[58],"innovative":[60],"framework":[61],"performs":[63],"joint":[64],"optimization":[65],"from":[66,127],"multiple":[67],"dimensions":[68],"for":[69],"order":[74,121,177,205],"strike":[76],"a":[77,254],"balance":[78],"among":[79],"performance,":[80],"realtimeliness":[81],"efficiency.":[84],"Specifically,":[85],"notation=\"LaTeX\">$^{2}$</tex-math></inline-formula>LLM":[87,137,171,227,243],"features":[88],"the":[89,124,128,133,149,153,159,163,167,174,180,190,195,198,208,218,221],"following":[90,129],"four":[91,114],"core":[92],"components":[93,115],"including":[94],":":[95],"1)":[96],"Hardware-aware":[97],"Model":[98],"Customization,":[99],"2)":[100],"Elastic":[101],"Chunk-wise":[102],"Pipeline,":[103],"3)":[104],"Latency-guided":[105],"Prompt":[106],"Compression":[107],"4)":[109],"Layer-wise":[110],"Resource":[111],"Scheduling.":[112],"These":[113],"interact":[116],"with":[117,212,233,252],"each":[118],"other":[119],"guide":[123],"process":[126,211],"three":[130],"dimensions.":[131],"At":[132,162],"model":[134,150,256],"level,":[135,165,197],"designs":[138],"elastic":[140],"chunk-wise":[141],"pipeline":[142],"expand":[144],"device":[145],"customize":[148],"according":[151],"hardware":[154],"configuration,":[155],"maximizing":[156],"performance":[157,257],"within":[158],"budget.":[161],"prompt":[164],"facing":[166],"stochastic":[168],"input,":[169],"judiciously":[172],"compresses":[173],"prompts":[175],"guarantee":[179],"first":[181],"token":[182,209],"be":[184],"generated":[185],"time":[187],"maintaining":[189],"semantic":[191],"information.":[192],"Additionally,":[193],"at":[194],"system":[196],"layer-wise":[199],"resource":[200],"scheduler":[201],"employed":[203],"complete":[207],"generation":[210],"minimized":[213],"guaranteeing":[217],"realtimeness":[219],"highly":[222],"dynamic":[223],"environment.":[225],"evaluated":[229],"off-the-shelf":[231],"smartphone":[232],"represented":[234],"models":[235],"datasets.":[237],"Compared":[238],"baseline":[240],"methods,":[241],"delivers":[244],"2.99\u201313.5\u00d7":[245],"TTFT":[246],"acceleration":[247],"2.28\u201324.3\u00d7":[249],"savings,":[251],"only":[253],"minimal":[255],"loss":[258],"2%":[260],"\u20137%":[261],".":[262]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-07-13T00:00:00"}
