{"id":"https://openalex.org/W7129033315","doi":"https://doi.org/10.1145/3773966.3777961","title":"On-Device Large Language Models for Sequential Recommendation","display_name":"On-Device Large Language Models for Sequential Recommendation","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7129033315","doi":"https://doi.org/10.1145/3773966.3777961"},"language":null,"primary_location":{"id":"doi:10.1145/3773966.3777961","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773966.3777961","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3773966.3777961","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101873166","display_name":"Xin Xia","orcid":"https://orcid.org/0000-0001-6250-5387"},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Xin Xia","raw_affiliation_strings":["The University of Queensland, Brisbane, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Queensland, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122317769","display_name":"Hongzhi Yin","orcid":null},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Hongzhi Yin","raw_affiliation_strings":["The University of Queensland, Brisbane, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Queensland, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5123141362","display_name":"Shane Culpepper","orcid":null},"institutions":[{"id":"https://openalex.org/I165143802","display_name":"The University of Queensland","ror":"https://ror.org/00rqy9422","country_code":"AU","type":"education","lineage":["https://openalex.org/I165143802"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Shane Culpepper","raw_affiliation_strings":["The University of Queensland, Brisbane, QLD, Australia"],"affiliations":[{"raw_affiliation_string":"The University of Queensland, Brisbane, QLD, Australia","institution_ids":["https://openalex.org/I165143802"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101873166"],"corresponding_institution_ids":["https://openalex.org/I165143802"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33119014,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"767","last_page":"777"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.46560001373291016,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10203","display_name":"Recommender Systems and Techniques","score":0.46560001373291016,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.11919999867677689,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11704","display_name":"Mobile Crowdsensing and Crowdsourcing","score":0.04879999905824661,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6504999995231628},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.5963000059127808},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.46950000524520874},{"id":"https://openalex.org/keywords/singular-value-decomposition","display_name":"Singular value decomposition","score":0.4368000030517578},{"id":"https://openalex.org/keywords/data-compression","display_name":"Data compression","score":0.4262000024318695},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4092999994754791},{"id":"https://openalex.org/keywords/recommender-system","display_name":"Recommender system","score":0.4018000066280365},{"id":"https://openalex.org/keywords/offset","display_name":"Offset (computer science)","score":0.36480000615119934},{"id":"https://openalex.org/keywords/the-internet","display_name":"The Internet","score":0.3619999885559082}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8615999817848206},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6504999995231628},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.5963000059127808},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.46950000524520874},{"id":"https://openalex.org/C22789450","wikidata":"https://www.wikidata.org/wiki/Q420904","display_name":"Singular value decomposition","level":2,"score":0.4368000030517578},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.4262000024318695},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C557471498","wikidata":"https://www.wikidata.org/wiki/Q554950","display_name":"Recommender system","level":2,"score":0.4018000066280365},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40130001306533813},{"id":"https://openalex.org/C175291020","wikidata":"https://www.wikidata.org/wiki/Q1156822","display_name":"Offset (computer science)","level":2,"score":0.36480000615119934},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.3619999885559082},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.34689998626708984},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.33329999446868896},{"id":"https://openalex.org/C42355184","wikidata":"https://www.wikidata.org/wiki/Q1361088","display_name":"Matrix decomposition","level":3,"score":0.32829999923706055},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.32690000534057617},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3151000142097473},{"id":"https://openalex.org/C74912251","wikidata":"https://www.wikidata.org/wiki/Q6815727","display_name":"Memory footprint","level":2,"score":0.3118000030517578},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.30790001153945923},{"id":"https://openalex.org/C124681953","wikidata":"https://www.wikidata.org/wiki/Q339062","display_name":"Decomposition","level":2,"score":0.3019999861717224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2971000075340271},{"id":"https://openalex.org/C52723943","wikidata":"https://www.wikidata.org/wiki/Q1127410","display_name":"Serialization","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29429998993873596},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.27570000290870667},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26669999957084656},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.26109999418258667},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.2599000036716461},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C180016635","wikidata":"https://www.wikidata.org/wiki/Q2712821","display_name":"Compression (physics)","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3773966.3777961","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773966.3777961","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3773966.3777961","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3773966.3777961","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Nineteenth ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1993482030","https://openalex.org/W2783272285","https://openalex.org/W2951645301","https://openalex.org/W2963367478","https://openalex.org/W2965862774","https://openalex.org/W2984100107","https://openalex.org/W3012782947","https://openalex.org/W3065542300","https://openalex.org/W3154916579","https://openalex.org/W3166439894","https://openalex.org/W3178193590","https://openalex.org/W4224926669","https://openalex.org/W4296591867","https://openalex.org/W4319338761","https://openalex.org/W4321521256","https://openalex.org/W4367047145","https://openalex.org/W4382518956","https://openalex.org/W4386728933","https://openalex.org/W4392846385","https://openalex.org/W4400531797","https://openalex.org/W4400909843","https://openalex.org/W4400909953","https://openalex.org/W4401834466","https://openalex.org/W4404918643","https://openalex.org/W4405674754","https://openalex.org/W4407881031"],"related_works":[],"abstract_inverted_index":{"On-device":[0],"recommendation":[1,48,91,169,184],"is":[2,27,151,191],"critical":[3],"for":[4,46,89,210],"a":[5,63,100,120,146,207],"number":[6],"of":[7,87,201],"real-world":[8],"applications,":[9],"especially":[10],"in":[11,116,160,177],"scenarios":[12],"that":[13,42,125,172],"have":[14],"agreements":[15],"on":[16,60,167],"execution":[17],"latency,":[18],"user":[19,44],"privacy,":[20],"and":[21,54,83,119,199],"robust":[22],"functionality":[23],"when":[24,141,179,186],"internet":[25],"connectivity":[26],"unstable":[28],"or":[29],"even":[30],"impossible.":[31],"While":[32],"large":[33],"language":[34],"models":[35],"(LLMs)":[36],"can":[37],"now":[38],"provide":[39,81],"exceptional":[40],"capabilities":[41],"model":[43,189],"behavior":[45],"sequential":[47,90,168],"tasks,":[49],"their":[50],"substantial":[51],"memory":[52],"footprint":[53],"computational":[55],"overhead":[56],"make":[57],"the":[58,73,117,128,156,161,182,187,197],"deployment":[59,86],"resource-constrained":[61],"devices":[62],"high":[64],"risk":[65],"proposition.":[66],"In":[67],"this":[68,204],"paper,":[69],"we":[70],"propose":[71],"OD-LLM,":[72,202],"first":[74],"task-adaptive":[75],"compression":[76,98,103,144],"framework":[77],"explicitly":[78],"designed":[79],"to":[80,111,135,153,181,215],"efficient":[82],"accurate":[84],"on-device":[85,212],"LLMs":[88],"tasks.":[92],"OD-LLM":[93,173],"uniquely":[94],"integrates":[95],"two":[96],"complementary":[97],"strategies:":[99],"low-rank":[101,129],"structural":[102],"algorithm":[104,150],"which":[105],"uses":[106],"Singular":[107],"Value":[108],"Decomposition":[109],"(SVD)":[110],"significantly":[112],"reduce":[113],"parameter":[114],"redundancy":[115],"model,":[118,185],"novel":[121,147,205],"tokenization":[122],"normalization":[123],"technique":[124],"better":[126],"complements":[127],"decomposition":[130],"process":[131],"being":[132],"used.":[133],"Additionally,":[134],"minimize":[136],"any":[137],"potential":[138],"performance":[139],"degradation":[140],"using":[142],"higher":[143],"ratios,":[145],"progressive":[148],"alignment":[149],"used":[152],"iteratively":[154],"refine":[155],"parameters":[157],"required":[158],"layerwise":[159],"target":[162],"model.":[163],"Empirical":[164],"evaluations":[165],"conducted":[166],"benchmarks":[170],"show":[171],"exhibits":[174],"no":[175],"loss":[176],"effectiveness":[178],"compared":[180],"original":[183],"deployed":[188],"size":[190],"halved.":[192],"These":[193],"promising":[194],"results":[195],"demonstrate":[196],"efficacy":[198],"scalability":[200],"making":[203],"solution":[206],"practical":[208],"alternative":[209],"real-time,":[211],"solutions":[213],"wishing":[214],"replace":[216],"expensive,":[217],"remotely":[218],"executed":[219],"LLMs.":[220]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-02-17T00:00:00"}
