{"id":"https://openalex.org/W7128644759","doi":"https://doi.org/10.48550/arxiv.2602.09725","title":"Efficient Remote Prefix Fetching with GPU-native Media ASICs","display_name":"Efficient Remote Prefix Fetching with GPU-native Media ASICs","publication_year":2026,"publication_date":"2026-02-10","ids":{"openalex":"https://openalex.org/W7128644759","doi":"https://doi.org/10.48550/arxiv.2602.09725"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.09725","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001895603","display_name":"Liang Mi","orcid":"https://orcid.org/0009-0000-6058-5950"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mi, Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100415467","display_name":"Weijun Wang","orcid":"https://orcid.org/0000-0002-9545-3322"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Weijun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080884386","display_name":"Jinghan Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Jinghan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125625099","display_name":"Ting Cao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cao, Ting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125676119","display_name":"Haipeng Dai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dai, Haipeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125633904","display_name":"Yunxin Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yunxin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5001895603"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.35019999742507935,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.35019999742507935,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.20810000598430634,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11478","display_name":"Caching and Content Delivery","score":0.09539999812841415,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cache","display_name":"Cache","score":0.7628999948501587},{"id":"https://openalex.org/keywords/reuse","display_name":"Reuse","score":0.6875},{"id":"https://openalex.org/keywords/lossless-compression","display_name":"Lossless compression","score":0.4302000105381012},{"id":"https://openalex.org/keywords/cpu-cache","display_name":"CPU cache","score":0.3928999900817871},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.3919000029563904},{"id":"https://openalex.org/keywords/prefix","display_name":"Prefix","score":0.35040000081062317},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.3467000126838684}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8141000270843506},{"id":"https://openalex.org/C115537543","wikidata":"https://www.wikidata.org/wiki/Q165596","display_name":"Cache","level":2,"score":0.7628999948501587},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.6875},{"id":"https://openalex.org/C9390403","wikidata":"https://www.wikidata.org/wiki/Q3966","display_name":"Computer hardware","level":1,"score":0.4343999922275543},{"id":"https://openalex.org/C81081738","wikidata":"https://www.wikidata.org/wiki/Q55542","display_name":"Lossless compression","level":3,"score":0.4302000105381012},{"id":"https://openalex.org/C189783530","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"CPU cache","level":3,"score":0.3928999900817871},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.3919000029563904},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.37860000133514404},{"id":"https://openalex.org/C141603448","wikidata":"https://www.wikidata.org/wiki/Q134830","display_name":"Prefix","level":2,"score":0.35040000081062317},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.3467000126838684},{"id":"https://openalex.org/C167713795","wikidata":"https://www.wikidata.org/wiki/Q352090","display_name":"Smart Cache","level":5,"score":0.3458000123500824},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.3343000113964081},{"id":"https://openalex.org/C78548338","wikidata":"https://www.wikidata.org/wiki/Q2493","display_name":"Data compression","level":2,"score":0.33320000767707825},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.325300008058548},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3203999996185303},{"id":"https://openalex.org/C77390884","wikidata":"https://www.wikidata.org/wiki/Q217302","display_name":"Application-specific integrated circuit","level":2,"score":0.31310001015663147},{"id":"https://openalex.org/C38556500","wikidata":"https://www.wikidata.org/wiki/Q13404475","display_name":"Cache algorithms","level":4,"score":0.30149999260902405},{"id":"https://openalex.org/C36340418","wikidata":"https://www.wikidata.org/wiki/Q7124288","display_name":"Page cache","level":5,"score":0.29249998927116394},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.28209999203681946},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C50954386","wikidata":"https://www.wikidata.org/wiki/Q656083","display_name":"Paging","level":2,"score":0.25209999084472656}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.09725","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.09725","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.09725","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.09725","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Remote":[0],"KV":[1,5,37,49,63,77,89,102,112],"cache":[2,6,64,78,90],"reuse":[3,50,65],"fetches":[4],"for":[7],"identical":[8],"contexts":[9],"from":[10,136],"remote":[11,62],"storage,":[12],"avoiding":[13],"recomputation,":[14],"accelerating":[15],"LLM":[16],"inference.":[17],"While":[18],"it":[19,143],"excels":[20],"in":[21,28,39,91,114],"high-speed":[22],"networks,":[23],"its":[24],"performance":[25],"degrades":[26],"significantly":[27],"bandwidth-limited":[29],"scenarios.":[30],"Recent":[31],"studies":[32],"address":[33],"this":[34,53],"by":[35,146],"transmitting":[36],"caches":[38,113],"compressed":[40,111],"form,":[41],"but":[42],"the":[43,48,88,105],"associated":[44],"heavyweight":[45],"decompression":[46],"counteracts":[47],"benefits.":[51],"In":[52],"paper,":[54],"we":[55],"propose":[56],"an":[57,115],"efficient":[58,101,116],"and":[59,108,125],"widely":[60],"deployable":[61],"solution":[66],"that":[67,142],"leverages":[68],"GPU-native":[69],"video":[70,95],"codecs.":[71],"Our":[72],"system,":[73],"KVFetcher,":[74],"enables":[75],"effective":[76],"coding":[79],"with":[80],"two":[81],"techniques.":[82],"The":[83,100],"codec-friendly":[84],"tensor":[85],"layout":[86],"compresses":[87],"a":[92],"highly":[93],"compact":[94],"format,":[96],"enabling":[97],"fast":[98],"transmission.":[99],"fetcher":[103],"orchestrates":[104],"transmission,":[106],"decoding,":[107],"restoration":[109],"of":[110],"pipelined":[117],"manner,":[118],"eliminating":[119],"resource":[120],"contention,":[121],"masking":[122],"network":[123],"fluctuations,":[124],"achieving":[126],"minimum":[127],"time-to-first-token":[128],"(TTFT).":[129],"We":[130],"prototype":[131],"KVFetcher":[132],"on":[133],"diverse":[134],"GPUs":[135],"high-":[137],"to":[138,148,156],"low-end.":[139],"Experiments":[140],"reveal":[141],"reduces":[144],"TTFT":[145],"up":[147],"3.51":[149],"times":[150],"while":[151],"maintaining":[152],"lossless":[153],"accuracy,":[154],"compared":[155],"SOTA":[157],"methods.":[158]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-12T00:00:00"}
