{"id":"https://openalex.org/W4407197294","doi":"https://doi.org/10.1145/3669940.3707224","title":"PipeLLM: Fast and Confidential Large Language Model Services with Speculative Pipelined Encryption","display_name":"PipeLLM: Fast and Confidential Large Language Model Services with Speculative Pipelined Encryption","publication_year":2025,"publication_date":"2025-02-06","ids":{"openalex":"https://openalex.org/W4407197294","doi":"https://doi.org/10.1145/3669940.3707224"},"language":"en","primary_location":{"id":"doi:10.1145/3669940.3707224","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3669940.3707224","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yifan Tan","orcid":"https://orcid.org/0009-0008-6329-7274"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yifan Tan","raw_affiliation_strings":["Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0008-6329-7274","affiliations":[{"raw_affiliation_string":"Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102759092","display_name":"Cheng Tan","orcid":"https://orcid.org/0000-0002-1420-5125"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cheng Tan","raw_affiliation_strings":["Northeastern University, Boston, USA"],"raw_orcid":"https://orcid.org/0000-0002-1420-5125","affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, USA","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051857167","display_name":"Zeyu Mi","orcid":"https://orcid.org/0000-0001-8395-1319"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zeyu Mi","raw_affiliation_strings":["Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-8395-1319","affiliations":[{"raw_affiliation_string":"Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100406215","display_name":"Haibo Chen","orcid":"https://orcid.org/0000-0002-9720-0361"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haibo Chen","raw_affiliation_strings":["Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-9720-0361","affiliations":[{"raw_affiliation_string":"Institute of Parallel and Distributed Systems, SEIEE, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":10.8663,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.9773129,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"843","last_page":"857"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10237","display_name":"Cryptography and Data Security","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12029","display_name":"DNA and Biological Computing","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11614","display_name":"Cloud Data Security Solutions","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8019413948059082},{"id":"https://openalex.org/keywords/encryption","display_name":"Encryption","score":0.7394540309906006},{"id":"https://openalex.org/keywords/confidentiality","display_name":"Confidentiality","score":0.5755616426467896},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.43855035305023193},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.37813568115234375}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019413948059082},{"id":"https://openalex.org/C148730421","wikidata":"https://www.wikidata.org/wiki/Q141090","display_name":"Encryption","level":2,"score":0.7394540309906006},{"id":"https://openalex.org/C71745522","wikidata":"https://www.wikidata.org/wiki/Q2476929","display_name":"Confidentiality","level":2,"score":0.5755616426467896},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.43855035305023193},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.37813568115234375}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3669940.3707224","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3669940.3707224","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 1","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G294480798","display_name":null,"funder_award_id":"2237295","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"},{"id":"https://openalex.org/G3320791394","display_name":null,"funder_award_id":"62372287,61925206","funder_id":"https://openalex.org/F4320323817","funder_display_name":"Universitas Brawijaya"}],"funders":[{"id":"https://openalex.org/F4320323817","display_name":"Universitas Brawijaya","ror":"https://ror.org/01wk3d929"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1624569747","https://openalex.org/W2068034551","https://openalex.org/W2588337836","https://openalex.org/W2734941459","https://openalex.org/W2930957133","https://openalex.org/W2973727699","https://openalex.org/W2990138404","https://openalex.org/W3015584356","https://openalex.org/W3049161050","https://openalex.org/W3121562065","https://openalex.org/W3168867926","https://openalex.org/W4281758439","https://openalex.org/W4308641861","https://openalex.org/W4308760184","https://openalex.org/W4322718191","https://openalex.org/W4324297016","https://openalex.org/W4380353763","https://openalex.org/W4387321091","https://openalex.org/W4390043271","https://openalex.org/W6638906098","https://openalex.org/W6755958147","https://openalex.org/W6778883912","https://openalex.org/W6811340617","https://openalex.org/W6846659131","https://openalex.org/W6852874933","https://openalex.org/W6856112224","https://openalex.org/W7034064029"],"related_works":["https://openalex.org/W4387497383","https://openalex.org/W3183948672","https://openalex.org/W3173606202","https://openalex.org/W3110381201","https://openalex.org/W2948807893","https://openalex.org/W2899084033","https://openalex.org/W2778153218","https://openalex.org/W2758277628","https://openalex.org/W2748952813","https://openalex.org/W1531601525"],"abstract_inverted_index":{"Confidential":[0],"computing":[1,158],"on":[2],"GPUs,":[3],"like":[4],"NVIDIA":[5],"H100,":[6],"mitigates":[7],"the":[8,59,63,73,79,93,100,110,122,128],"security":[9],"risks":[10],"of":[11,99,131,145],"outsourced":[12],"Large":[13],"Language":[14],"Models":[15],"(LLMs)":[16],"by":[17,61,72,83,109,126],"implementing":[18],"strong":[19],"isolation":[20],"and":[21,36,43,65,162],"data":[22,102,123],"encryption.":[23,84],"Nonetheless,":[24],"this":[25,48,113],"encryption":[26,64,94,104,119,125],"incurs":[27,165],"a":[28,53],"significant":[29],"performance":[30],"overhead,":[31],"reaching":[32],"up":[33],"to":[34,120,179],"52.8%":[35],"88.2%":[37],"throughput":[38],"drop":[39],"when":[40],"serving":[41,129],"OPT-30B":[42],"OPT-66B,":[44],"respectively.":[45],"To":[46,112],"address":[47],"challenge,":[49],"we":[50,115,134],"introduce":[51],"PipeLLM,":[52],"user-transparent":[54],"runtime":[55],"system.":[56],"PipeLLM":[57,164],"removes":[58],"overhead":[60,167],"overlapping":[62],"GPU":[66],"computation":[67],"through":[68],"pipelining-an":[69],"idea":[70],"inspired":[71],"CPU":[74],"instruction":[75],"pipelining-thereby":[76],"effectively":[77],"concealing":[78],"latency":[80],"increase":[81],"caused":[82],"The":[85],"primary":[86],"technical":[87],"challenge":[88],"is":[89,107,184],"that,":[90],"unlike":[91],"CPUs,":[92],"module":[95],"lacks":[96],"prior":[97],"knowledge":[98],"specific":[101],"needing":[103],"until":[105],"it":[106],"requested":[108],"GPUs.":[111],"end,":[114],"propose":[116],"speculative":[117],"pipelined":[118],"predict":[121],"requiring":[124],"analyzing":[127],"patterns":[130],"LLMs.":[132],"Further,":[133],"have":[135],"developed":[136],"an":[137],"efficient,":[138],"low-cost":[139],"pipeline":[140],"relinquishing":[141],"approach":[142],"for":[143],"instances":[144],"incorrect":[146],"predictions.":[147],"Our":[148],"experiments":[149],"show":[150],"that":[151],"compared":[152],"with":[153],"vanilla":[154],"systems":[155],"without":[156],"confidential":[157],"(e.g.,":[159],"vLLM,":[160],"PEFT,":[161],"FlexGen),":[163],"modest":[166],"(":[168],"<":[169],"19.6%":[170],"in":[171],"throughput)":[172],"across":[173],"various":[174],"LLM":[175],"sizes,":[176],"from":[177],"13B":[178],"175B.":[180],"PipeLLM's":[181],"source":[182],"code":[183],"available":[185],"at":[186],"https://github.com/SJTU-IPADS/PipeLLM.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":4}],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
