{"id":"https://openalex.org/W4413120526","doi":"https://doi.org/10.1109/tsc.2025.3596892","title":"TPI-LLM: Serving 70B-Scale LLMs Efficiently on Low-Resource Mobile Devices","display_name":"TPI-LLM: Serving 70B-Scale LLMs Efficiently on Low-Resource Mobile Devices","publication_year":2025,"publication_date":"2025-08-08","ids":{"openalex":"https://openalex.org/W4413120526","doi":"https://doi.org/10.1109/tsc.2025.3596892"},"language":"en","primary_location":{"id":"doi:10.1109/tsc.2025.3596892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsc.2025.3596892","pdf_url":null,"source":{"id":"https://openalex.org/S204223317","display_name":"IEEE Transactions on Services Computing","issn_l":"1939-1374","issn":["1939-1374","2372-0204"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Services Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5022284072","display_name":"Zonghang Li","orcid":"https://orcid.org/0000-0002-2796-039X"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":true,"raw_author_name":"Zonghang Li","raw_affiliation_strings":["Department of Machine Learning, MBZUAI, Abu Dhabi, UAE"],"raw_orcid":"https://orcid.org/0000-0002-2796-039X","affiliations":[{"raw_affiliation_string":"Department of Machine Learning, MBZUAI, Abu Dhabi, UAE","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wenjiao Feng","orcid":"https://orcid.org/0009-0007-6117-2999"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenjiao Feng","raw_affiliation_strings":["School of Information and Communication Engineering, UESTC, Chengdu, China"],"raw_orcid":"https://orcid.org/0009-0007-6117-2999","affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, UESTC, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057916222","display_name":"Mohsen Guizani","orcid":"https://orcid.org/0000-0002-8972-8094"},"institutions":[{"id":"https://openalex.org/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https://ror.org/0258gkt32","country_code":"AE","type":"education","lineage":["https://openalex.org/I4210113480"]}],"countries":["AE"],"is_corresponding":false,"raw_author_name":"Mohsen Guizani","raw_affiliation_strings":["Department of Machine Learning, MBZUAI, Abu Dhabi, UAE"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Machine Learning, MBZUAI, Abu Dhabi, UAE","institution_ids":["https://openalex.org/I4210113480"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101507232","display_name":"Hongfang Yu","orcid":"https://orcid.org/0000-0002-5219-1780"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongfang Yu","raw_affiliation_strings":["School of Information and Communication Engineering, UESTC, Chengdu, China"],"raw_orcid":"https://orcid.org/0000-0002-5219-1780","affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, UESTC, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5022284072"],"corresponding_institution_ids":["https://openalex.org/I4210113480"],"apc_list":null,"apc_paid":null,"fwci":10.8663,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.98058471,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":"18","issue":"5","first_page":"3321","last_page":"3333"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.988099992275238,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11181","display_name":"Advanced Data Storage Technologies","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10715","display_name":"Distributed and Parallel Computing Systems","score":0.9560999870300293,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7075949311256409},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.5334275960922241},{"id":"https://openalex.org/keywords/mobile-device","display_name":"Mobile device","score":0.4811221659183502},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4771401584148407},{"id":"https://openalex.org/keywords/mobile-computing","display_name":"Mobile computing","score":0.41210103034973145},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.3575042486190796},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.27090686559677124},{"id":"https://openalex.org/keywords/computer-network","display_name":"Computer network","score":0.2701064944267273},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2035086750984192}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7075949311256409},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.5334275960922241},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.4811221659183502},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4771401584148407},{"id":"https://openalex.org/C144543869","wikidata":"https://www.wikidata.org/wiki/Q2738570","display_name":"Mobile computing","level":2,"score":0.41210103034973145},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3575042486190796},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.27090686559677124},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.2701064944267273},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2035086750984192},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tsc.2025.3596892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tsc.2025.3596892","pdf_url":null,"source":{"id":"https://openalex.org/S204223317","display_name":"IEEE Transactions on Services Computing","issn_l":"1939-1374","issn":["1939-1374","2372-0204"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Services Computing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5223361291","display_name":null,"funder_award_id":"62394324","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2962834855","https://openalex.org/W2979826702","https://openalex.org/W3081168214","https://openalex.org/W3129831491","https://openalex.org/W3194657444","https://openalex.org/W4308083513","https://openalex.org/W4321636575","https://openalex.org/W4384705353","https://openalex.org/W4387321091","https://openalex.org/W4388979610","https://openalex.org/W4389524555","https://openalex.org/W4393578753","https://openalex.org/W4393949386","https://openalex.org/W4394998727","https://openalex.org/W4395020691","https://openalex.org/W4395704222","https://openalex.org/W4399568414","https://openalex.org/W4401508667","https://openalex.org/W4401568153","https://openalex.org/W4402595168","https://openalex.org/W4403421327","https://openalex.org/W4404401018","https://openalex.org/W4405934565","https://openalex.org/W4407217670","https://openalex.org/W4408182386","https://openalex.org/W4408324844","https://openalex.org/W4410564946"],"related_works":["https://openalex.org/W3034529322","https://openalex.org/W2115913271","https://openalex.org/W2113597336","https://openalex.org/W2155505549","https://openalex.org/W2048100608","https://openalex.org/W2090296580","https://openalex.org/W1576249345","https://openalex.org/W4243905374","https://openalex.org/W2785815065","https://openalex.org/W1796074903"],"abstract_inverted_index":{"LLM":[0,34],"serving":[1],"is":[2,41],"shifting":[3],"from":[4],"cloud":[5],"to":[6,9,32,86,110,143],"edge":[7],"due":[8],"privacy":[10],"concerns":[11],"over":[12],"user":[13],"interaction":[14],"data.":[15],"However,":[16],"mobile":[17,47,92],"devices":[18,31,48,102],"struggle":[19],"with":[20,119],"very":[21],"limited":[22],"computing":[23],"power":[24],"and":[25,79,103,121,146],"memory,":[26],"requiring":[27,157],"collaboration":[28],"among":[29],"multiple":[30],"run":[33,50,87],"apps.":[35],"The":[36],"mainstream":[37],"solution,":[38],"pipeline":[39],"parallelism,":[40,63],"inefficient":[42],"for":[43,163],"such":[44,72],"cases":[45],"because":[46],"typically":[49],"only":[51],"one":[52],"inference":[53,83],"task":[54],"at":[55],"a":[56,77,105],"time.":[57],"This":[58],"article":[59],"argues":[60],"that":[61,135],"tensor":[62,81],"despite":[64],"its":[65],"high":[66],"communication":[67],"cost,":[68],"can":[69],"better":[70],"fit":[71],"scenarios.":[73],"We":[74],"introduce":[75],"TPI-LLM,":[76],"compute":[78],"memory-efficient":[80],"parallel":[82],"system":[84],"designed":[85],"70B-scale":[88,164],"LLMs":[89],"on":[90,100,129],"low-resource":[91],"devices.":[93,131],"It":[94,115,148],"keeps":[95],"sensitive":[96],"raw":[97],"data":[98],"local":[99],"users\u2019":[101],"employs":[104],"sliding":[106],"window":[107],"memory":[108,153,162],"scheduler":[109],"dynamically":[111],"manage":[112],"layer":[113],"weights.":[114],"overlaps":[116],"disk":[117],"I/O":[118],"computation":[120],"communication,":[122],"enabling":[123],"efficient":[124],"operation":[125],"of":[126,161],"large":[127],"models":[128],"memory-limited":[130],"Extensive":[132],"experiments":[133],"show":[134],"TPI-LLM":[136],"reduces":[137],"token":[138],"latency":[139],"by":[140,155],"80%\u201390%":[141],"compared":[142],"Transformers,":[144],"Accelerate,":[145],"Galaxy.":[147],"also":[149],"cuts":[150],"the":[151],"peak":[152],"footprint":[154],"90%,":[156],"just":[158],"3.1":[159],"GiB":[160],"models.":[165]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
