{"id":"https://openalex.org/W7130682369","doi":"https://doi.org/10.1145/3798065.3798083","title":"HybridPrompt: Bridging Generative Priors and Traditional Codecs for Mobile Streaming","display_name":"HybridPrompt: Bridging Generative Priors and Traditional Codecs for Mobile Streaming","publication_year":2026,"publication_date":"2026-04-04","ids":{"openalex":"https://openalex.org/W7130682369","doi":"https://doi.org/10.1145/3798065.3798083"},"language":null,"primary_location":{"id":"doi:10.1145/3798065.3798083","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3798065.3798083","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th Workshop on Network and Operating System Support for Digital Audio and Video","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2602.17120","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126497808","display_name":"Liming Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Liming Liu","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0004-9168-4897","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057448142","display_name":"Jiangkai Wu","orcid":"https://orcid.org/0009-0007-7628-6673"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiangkai Wu","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0007-7628-6673","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126487813","display_name":"Haoyang Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haoyang Wang","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0005-0826-8081","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073195443","display_name":"Peiheng Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peiheng Wang","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0008-3519-8420","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126484314","display_name":"Zongming Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zongming Guo","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-4944-9621","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126476566","display_name":"Xinggong Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinggong Zhang","raw_affiliation_strings":["Peking University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0003-0484-5951","affiliations":[{"raw_affiliation_string":"Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5126497808"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.33837394,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"120","last_page":"126"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.8436999917030334,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11165","display_name":"Image and Video Quality Assessment","score":0.8436999917030334,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.03240000084042549,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10741","display_name":"Video Coding and Compression Technologies","score":0.028200000524520874,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/codec","display_name":"Codec","score":0.7236999869346619},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.6820999979972839},{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.6470000147819519},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.6017000079154968},{"id":"https://openalex.org/keywords/fidelity","display_name":"Fidelity","score":0.4088999927043915},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.40619999170303345},{"id":"https://openalex.org/keywords/high-fidelity","display_name":"High fidelity","score":0.37950000166893005},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.3686000108718872}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8504999876022339},{"id":"https://openalex.org/C161765866","wikidata":"https://www.wikidata.org/wiki/Q184748","display_name":"Codec","level":2,"score":0.7236999869346619},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.6820999979972839},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.6470000147819519},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.6017000079154968},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.4088999927043915},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.40619999170303345},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4056999981403351},{"id":"https://openalex.org/C113364801","wikidata":"https://www.wikidata.org/wiki/Q26674","display_name":"High fidelity","level":2,"score":0.37950000166893005},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3686000108718872},{"id":"https://openalex.org/C186967261","wikidata":"https://www.wikidata.org/wiki/Q5082128","display_name":"Mobile device","level":2,"score":0.3682999908924103},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.3659000098705292},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3434999883174896},{"id":"https://openalex.org/C113775141","wikidata":"https://www.wikidata.org/wiki/Q428691","display_name":"Computer engineering","level":1,"score":0.3149000108242035},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.30550000071525574},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.30379998683929443},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.29829999804496765},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.2937000095844269},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.2799000144004822},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.26100000739097595},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.25920000672340393},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25859999656677246},{"id":"https://openalex.org/C197115733","wikidata":"https://www.wikidata.org/wiki/Q1003136","display_name":"Forcing (mathematics)","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.2506999969482422}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3798065.3798083","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3798065.3798083","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 36th Workshop on Network and Operating System Support for Digital Audio and Video","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2602.17120","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2602.17120","pdf_url":"https://arxiv.org/pdf/2602.17120","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2602.17120","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2602.17120","pdf_url":"https://arxiv.org/pdf/2602.17120","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.5546700358390808}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7130682369.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"Video":[1],"on":[2,41,48,91,110],"Demand":[3],"(VoD)":[4],"scenarios,":[5],"traditional":[6,63,111,142,154,194,223],"codecs":[7,33,112,224],"are":[8,182],"the":[9,59,66,76,114,122,128,137,153,175,193],"industry":[10],"standard":[11],"due":[12],"to":[13,57,167,178],"their":[14,39],"high":[15],"decoding":[16,86,155],"efficiency.":[17],"However,":[18],"they":[19],"suffer":[20],"from":[21,131],"severe":[22],"quality":[23],"degradation":[24],"under":[25],"low":[26],"bandwidth":[27],"conditions.":[28],"While":[29],"emerging":[30],"generative":[31,106,132,176],"neural":[32,71,206],"offer":[34],"significantly":[35],"higher":[36],"perceptual":[37],"quality,":[38],"reliance":[40],"heavy":[42],"frame-by-frame":[43],"generation":[44,200],"makes":[45],"real-time":[46,84],"playback":[47],"mobile":[49],"devices":[50],"impractical.":[51],"We":[52,73],"ask:":[53],"is":[54,120,157],"it":[55],"possible":[56],"combine":[58],"blazing-fast":[60],"speed":[61,213],"of":[62,70,82,141,210,220],"standards":[64],"with":[65,136],"superior":[67],"visual":[68],"fidelity":[69],"approaches?":[72],"present":[74],"HybridPrompt,":[75],"first":[77],"generative-based":[78],"video":[79],"system":[80,203],"capable":[81],"achieving":[83,215],"1080p":[85],"at":[87,225],"over":[88,222],"150":[89],"FPS":[90],"a":[92,98,105,198],"commercial":[93],"smartphone.":[94],"Specifically,":[95],"we":[96,150],"employ":[97],"hybrid":[99],"architecture":[100],"that":[101,121,152,181],"encodes":[102],"Keyframes":[103],"using":[104],"model":[107,177],"while":[108,214],"relying":[109],"for":[113,192],"remaining":[115],"frames.":[116],"A":[117],"major":[118],"challenge":[119],"two":[123],"paradigms":[124],"have":[125],"conflicting":[126],"objectives:":[127],"\"hallucinated\"":[129],"details":[130],"models":[133],"often":[134],"misalign":[135],"rigid":[138],"prediction":[139],"mechanisms":[140],"codecs,":[143],"causing":[144],"bitrate":[145],"inefficiency.":[146],"To":[147],"address":[148],"this,":[149],"demonstrate":[151],"process":[156],"differentiable,":[158],"enabling":[159],"an":[160,216],"end-to-end":[161],"optimization":[162],"loop.":[163],"This":[164],"allows":[165],"us":[166],"use":[168],"subsequent":[169],"frames":[170],"as":[171],"additional":[172],"supervision,":[173],"forcing":[174],"synthesize":[179],"keyframes":[180],"not":[183],"only":[184],"perceptually":[185],"high-fidelity":[186],"but":[187],"also":[188],"mathematically":[189],"optimal":[190],"references":[191],"codec.":[195],"By":[196],"integrating":[197],"two-stage":[199],"strategy,":[201],"our":[202],"outperforms":[204],"pure":[205],"baselines":[207],"by":[208],"orders":[209],"magnitude":[211],"in":[212],"average":[217],"LPIPS":[218],"gain":[219],"8%":[221],"200kbps.":[226]},"counts_by_year":[],"updated_date":"2026-04-09T06:13:59.934233","created_date":"2026-02-21T00:00:00"}
