{"id":"https://openalex.org/W4416956208","doi":"https://doi.org/10.1145/3769102.3770614","title":"lm-Meter: Unveiling Runtime Inference Latency for On-Device Language Models","display_name":"lm-Meter: Unveiling Runtime Inference Latency for On-Device Language Models","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W4416956208","doi":"https://doi.org/10.1145/3769102.3770614"},"language":null,"primary_location":{"id":"doi:10.1145/3769102.3770614","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3769102.3770614","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3769102.3770614","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM/IEEE Symposium on Edge Computing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3769102.3770614","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077861883","display_name":"Haoxin Wang","orcid":"https://orcid.org/0000-0002-8732-6200"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Haoxin Wang","raw_affiliation_strings":["Computer Science, Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science, Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051059790","display_name":"Xiaolong Tu","orcid":"https://orcid.org/0009-0001-9396-5383"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiaolong Tu","raw_affiliation_strings":["Computer Science, Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science, Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031528341","display_name":"Hongyu Ke","orcid":"https://orcid.org/0009-0003-5653-9814"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hongyu Ke","raw_affiliation_strings":["Computer Science, Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science, Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119915922","display_name":"Huirong Chai","orcid":"https://orcid.org/0009-0004-6255-169X"},"institutions":[{"id":"https://openalex.org/I181565077","display_name":"Georgia State University","ror":"https://ror.org/03qt6ba18","country_code":"US","type":"education","lineage":["https://openalex.org/I181565077"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Huirong Chai","raw_affiliation_strings":["Computer Science, Georgia State University, Atlanta, GA, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science, Georgia State University, Atlanta, GA, USA","institution_ids":["https://openalex.org/I181565077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100322085","display_name":"Dawei Chen","orcid":"https://orcid.org/0000-0002-4162-1423"},"institutions":[{"id":"https://openalex.org/I1293612202","display_name":"Toyota Motor Corporation (Switzerland)","ror":"https://ror.org/05p0pbv75","country_code":"CH","type":"company","lineage":["https://openalex.org/I1293612202","https://openalex.org/I4210125472","https://openalex.org/I4210137853"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Dawei Chen","raw_affiliation_strings":["Infotech Labs, Toyota Motor North America R&amp;D, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Infotech Labs, Toyota Motor North America R&amp;D, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1293612202"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009775690","display_name":"Kyungtae Han","orcid":"https://orcid.org/0000-0001-8291-5025"},"institutions":[{"id":"https://openalex.org/I1293612202","display_name":"Toyota Motor Corporation (Switzerland)","ror":"https://ror.org/05p0pbv75","country_code":"CH","type":"company","lineage":["https://openalex.org/I1293612202","https://openalex.org/I4210125472","https://openalex.org/I4210137853"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Kyungtae Han","raw_affiliation_strings":["Infotech Labs, Toyota Motor North America R&amp;D, Mountain View, CA, USA"],"affiliations":[{"raw_affiliation_string":"Infotech Labs, Toyota Motor North America R&amp;D, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1293612202"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5077861883"],"corresponding_institution_ids":["https://openalex.org/I181565077"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.510222,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"17"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.14579999446868896,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.14579999446868896,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.07660000026226044,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.0706000030040741,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.6229000091552734},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6169000267982483},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5981000065803528},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.4855000078678131},{"id":"https://openalex.org/keywords/edge-computing","display_name":"Edge computing","score":0.41029998660087585},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.3864000141620636},{"id":"https://openalex.org/keywords/visibility","display_name":"Visibility","score":0.3521000146865845},{"id":"https://openalex.org/keywords/mobile-edge-computing","display_name":"Mobile edge computing","score":0.3416000008583069},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.3353999853134155}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8051000237464905},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.6229000091552734},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6169000267982483},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5981000065803528},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.4855000078678131},{"id":"https://openalex.org/C2778456923","wikidata":"https://www.wikidata.org/wiki/Q5337692","display_name":"Edge computing","level":3,"score":0.41029998660087585},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.4081999957561493},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.3864000141620636},{"id":"https://openalex.org/C123403432","wikidata":"https://www.wikidata.org/wiki/Q654068","display_name":"Visibility","level":2,"score":0.3521000146865845},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.34940001368522644},{"id":"https://openalex.org/C2776061582","wikidata":"https://www.wikidata.org/wiki/Q25325231","display_name":"Mobile edge computing","level":3,"score":0.3416000008583069},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.3353999853134155},{"id":"https://openalex.org/C144543869","wikidata":"https://www.wikidata.org/wiki/Q2738570","display_name":"Mobile computing","level":2,"score":0.3278000056743622},{"id":"https://openalex.org/C146481406","wikidata":"https://www.wikidata.org/wiki/Q105131","display_name":"Hotspot (geology)","level":2,"score":0.3249000012874603},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.31929999589920044},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3188000023365021},{"id":"https://openalex.org/C153646914","wikidata":"https://www.wikidata.org/wiki/Q535695","display_name":"Cellular network","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C95491727","wikidata":"https://www.wikidata.org/wiki/Q992968","display_name":"Mobile telephony","level":3,"score":0.30730000138282776},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2946000099182129},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.29269999265670776},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2904999852180481},{"id":"https://openalex.org/C162307627","wikidata":"https://www.wikidata.org/wiki/Q204833","display_name":"Enhanced Data Rates for GSM Evolution","level":2,"score":0.2888000011444092},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2840999960899353},{"id":"https://openalex.org/C157764524","wikidata":"https://www.wikidata.org/wiki/Q1383412","display_name":"Throughput","level":3,"score":0.2775000035762787},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C138673069","wikidata":"https://www.wikidata.org/wiki/Q322229","display_name":"Tracing","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C2781307350","wikidata":"https://www.wikidata.org/wiki/Q6887221","display_name":"Mobile radio","level":2,"score":0.2761000096797943},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2736999988555908},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.26420000195503235},{"id":"https://openalex.org/C74193536","wikidata":"https://www.wikidata.org/wiki/Q574844","display_name":"Kernel (algebra)","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C78834623","wikidata":"https://www.wikidata.org/wiki/Q640394","display_name":"Mobile broadband","level":3,"score":0.25459998846054077},{"id":"https://openalex.org/C2988145974","wikidata":"https://www.wikidata.org/wiki/Q620615","display_name":"Mobile apps","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2522999942302704}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3769102.3770614","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3769102.3770614","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3769102.3770614","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM/IEEE Symposium on Edge Computing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3769102.3770614","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3769102.3770614","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3769102.3770614","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Tenth ACM/IEEE Symposium on Edge Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416956208.pdf","grobid_xml":"https://content.openalex.org/works/W4416956208.grobid-xml"},"referenced_works_count":2,"referenced_works":["https://openalex.org/W4281707531","https://openalex.org/W4407212618"],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4,191],"increasingly":[5],"integrated":[6],"into":[7,63,165],"everyday":[8],"applications,":[9],"but":[10],"their":[11],"prevalent":[12],"cloud-based":[13],"deployment":[14],"raises":[15],"growing":[16],"concerns":[17],"around":[18],"data":[19],"privacy":[20],"and":[21,29,41,55,97,110,127,146,156,180,189],"long-term":[22],"sustainability.":[23],"Running":[24],"LLMs":[25,170],"locally":[26],"on":[27,66,106,171],"mobile":[28,108],"edge":[30],"devices":[31],"(on-device":[32],"LLMs)":[33],"offers":[34],"the":[35,72,132,166,175,182],"promise":[36],"of":[37,169,184],"enhanced":[38],"privacy,":[39],"reliability,":[40],"reduced":[42],"communication":[43],"costs.":[44],"However,":[45],"realizing":[46],"this":[47],"vision":[48],"remains":[49],"challenging":[50],"due":[51],"to":[52],"substantial":[53],"memory":[54],"compute":[56],"demands,":[57],"as":[58,60],"well":[59],"limited":[61],"visibility":[62,164],"performance-efficiency":[64],"trade-offs":[65],"resource-constrained":[67],"hardware.":[68],"We":[69,103],"propose":[70],"lm-Meter,":[71,138],"first":[73],"lightweight,":[74],"online":[75],"latency":[76,87],"profiler":[77],"tailored":[78],"for":[79,177],"on-device":[80,150,185],"LLM":[81,151,186],"inference.":[82],"lm-Meter":[83,105,161],"captures":[84],"fine-grained,":[85],"real-time":[86],"at":[88,193],"both":[89],"phase":[90],"(e.g.,":[91],"embedding,":[92],"prefill,":[93],"decode,":[94],"softmax,":[95],"sampling)":[96],"kernel":[98],"levels":[99],"without":[100],"auxiliary":[101],"devices.":[102],"implement":[104],"commercial":[107],"platforms":[109],"demonstrate":[111],"its":[112],"high":[113],"profiling":[114],"accuracy":[115],"with":[116],"minimal":[117],"system":[118],"overhead,":[119],"e.g.,":[120],"only":[121],"2.58%":[122],"throughput":[123],"reduction":[124],"in":[125,129,149],"prefill":[126],"0.99%":[128],"decode":[130],"under":[131],"most":[133],"constrained":[134,172],"Powersave":[135],"governor.":[136],"Leveraging":[137],"we":[139],"conduct":[140],"comprehensive":[141],"empirical":[142],"studies":[143],"revealing":[144],"phase-":[145],"kernel-level":[147],"bottlenecks":[148],"inference,":[152],"quantifying":[153],"accuracy-efficiency":[154],"trade-offs,":[155],"identifying":[157],"systematic":[158],"optimization":[159,179],"opportunities.":[160],"provides":[162],"unprecedented":[163],"runtime":[167],"behavior":[168],"platforms,":[173],"laying":[174],"foundation":[176],"informed":[178],"accelerating":[181],"democratization":[183],"systems.":[187],"Code":[188],"tutorials":[190],"available":[192],"github.com/amai-gsu/LM-Meter.":[194]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-12-03T00:00:00"}
