{"id":"https://openalex.org/W4409498455","doi":"https://doi.org/10.1145/3730402","title":"VPFormer: Leveraging Transformer with Voxel Integration for Viewport Prediction in Volumetric Video","display_name":"VPFormer: Leveraging Transformer with Voxel Integration for Viewport Prediction in Volumetric Video","publication_year":2025,"publication_date":"2025-04-16","ids":{"openalex":"https://openalex.org/W4409498455","doi":"https://doi.org/10.1145/3730402"},"language":"en","primary_location":{"id":"doi:10.1145/3730402","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3730402","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100428183","display_name":"Jie Li","orcid":"https://orcid.org/0000-0001-8483-6240"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jie Li","raw_affiliation_strings":["School of Computer and Information, Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Information, Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104364931","display_name":"Zhang-Rui Zhao","orcid":"https://orcid.org/0009-0001-4250-4966"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixia Zhao","raw_affiliation_strings":["School of Computer and Information, Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Computer and Information, Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032143076","display_name":"Qiyue Li","orcid":"https://orcid.org/0000-0002-9399-8759"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiyue Li","raw_affiliation_strings":["School of Electrical Engineering and Automation, Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering and Automation, Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088409180","display_name":"Zhixin Li","orcid":null},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]},{"id":"https://openalex.org/I75421653","display_name":"University of Missouri\u2013Kansas City","ror":"https://ror.org/01w0d5g70","country_code":"US","type":"education","lineage":["https://openalex.org/I75421653"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Zhixin Li","raw_affiliation_strings":["Department of Computer Science and Electrical Engineering, University of Missouri\u2013Kansas City, USA","School of Computer and Information, Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Electrical Engineering, University of Missouri\u2013Kansas City, USA","institution_ids":["https://openalex.org/I75421653"]},{"raw_affiliation_string":"School of Computer and Information, Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004288865","display_name":"Pengyuan Zhou","orcid":"https://orcid.org/0000-0002-7909-4059"},"institutions":[{"id":"https://openalex.org/I204337017","display_name":"Aarhus University","ror":"https://ror.org/01aj84f44","country_code":"DK","type":"education","lineage":["https://openalex.org/I204337017"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Peng Yuan Zhou","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Aarhus University, Denmark"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Aarhus University, Denmark","institution_ids":["https://openalex.org/I204337017"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100603421","display_name":"Zhi Liu","orcid":"https://orcid.org/0000-0003-0537-4522"},"institutions":[{"id":"https://openalex.org/I20529979","display_name":"University of Electro-Communications","ror":"https://ror.org/02x73b849","country_code":"JP","type":"education","lineage":["https://openalex.org/I20529979"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Zhi Liu","raw_affiliation_strings":["School of Informatics and Engineering, The University of Electro-Communications, Japan"],"affiliations":[{"raw_affiliation_string":"School of Informatics and Engineering, The University of Electro-Communications, Japan","institution_ids":["https://openalex.org/I20529979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068533528","display_name":"Hao Zhou","orcid":"https://orcid.org/0000-0002-7545-6745"},"institutions":[{"id":"https://openalex.org/I126520041","display_name":"University of Science and Technology of China","ror":"https://ror.org/04c4dkn09","country_code":"CN","type":"education","lineage":["https://openalex.org/I126520041","https://openalex.org/I19820366"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Zhou","raw_affiliation_strings":["School of Computer Science and Technology, University of Science and Technology of China, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, University of Science and Technology of China, China","institution_ids":["https://openalex.org/I126520041"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100380625","display_name":"Zhu Li","orcid":"https://orcid.org/0000-0002-8246-177X"},"institutions":[{"id":"https://openalex.org/I16365422","display_name":"Hefei University of Technology","ror":"https://ror.org/02czkny70","country_code":"CN","type":"education","lineage":["https://openalex.org/I16365422"]},{"id":"https://openalex.org/I75421653","display_name":"University of Missouri\u2013Kansas City","ror":"https://ror.org/01w0d5g70","country_code":"US","type":"education","lineage":["https://openalex.org/I75421653"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Zhu Li","raw_affiliation_strings":["Department of Computer Science and Electrical Engineering, University of Missouri\u2013Kansas City, USA","School of Computer and Information, Hefei University of Technology, China"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Electrical Engineering, University of Missouri\u2013Kansas City, USA","institution_ids":["https://openalex.org/I75421653"]},{"raw_affiliation_string":"School of Computer and Information, Hefei University of Technology, China","institution_ids":["https://openalex.org/I16365422"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5100428183"],"corresponding_institution_ids":["https://openalex.org/I16365422"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.06247576,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10481","display_name":"Computer Graphics and Visualization Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1704","display_name":"Computer Graphics and Computer-Aided Design"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11105","display_name":"Advanced Image Processing Techniques","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/viewport","display_name":"Viewport","score":0.9665640592575073},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8819514513015747},{"id":"https://openalex.org/keywords/voxel","display_name":"Voxel","score":0.7881274223327637},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5330036878585815},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.4341592788696289},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.36737778782844543},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3315125107765198},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.0903775691986084}],"concepts":[{"id":"https://openalex.org/C2778090530","wikidata":"https://www.wikidata.org/wiki/Q2523931","display_name":"Viewport","level":2,"score":0.9665640592575073},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8819514513015747},{"id":"https://openalex.org/C54170458","wikidata":"https://www.wikidata.org/wiki/Q663554","display_name":"Voxel","level":2,"score":0.7881274223327637},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5330036878585815},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.4341592788696289},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36737778782844543},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3315125107765198},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0903775691986084},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3730402","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3730402","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},{"id":"pmh:oai:pure.atira.dk:publications/e15207e0-ff77-4060-a26f-17b2e0fe7ec3","is_oa":false,"landing_page_url":"https://pure.au.dk/portal/en/publications/e15207e0-ff77-4060-a26f-17b2e0fe7ec3","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Li, J, Zhao, Z, Li, Q, Li, Z, Zhou, P, Liu, Z, Zhou, H & Li, Z 2025, 'VPFormer : Leveraging Transformer with Voxel Integration for Viewport Prediction in Volumetric Video', ACM Transactions on Multimedia Computing, Communications and Applications, vol. 21, no. 6, 158. https://doi.org/10.1145/3730402","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W1514535095","https://openalex.org/W1972191802","https://openalex.org/W2319503556","https://openalex.org/W2585813514","https://openalex.org/W2624503621","https://openalex.org/W2624991012","https://openalex.org/W2789596518","https://openalex.org/W2799064164","https://openalex.org/W2884414611","https://openalex.org/W2895957446","https://openalex.org/W2939201152","https://openalex.org/W2962965915","https://openalex.org/W2963057320","https://openalex.org/W2963123724","https://openalex.org/W2963231572","https://openalex.org/W2963727135","https://openalex.org/W2968149995","https://openalex.org/W2991443709","https://openalex.org/W3000681136","https://openalex.org/W3011159591","https://openalex.org/W3020127778","https://openalex.org/W3040643035","https://openalex.org/W3045290914","https://openalex.org/W3094082851","https://openalex.org/W3096564590","https://openalex.org/W3115498134","https://openalex.org/W3168718178","https://openalex.org/W3196832763","https://openalex.org/W3212456028","https://openalex.org/W4205097557","https://openalex.org/W4212948839","https://openalex.org/W4213422746","https://openalex.org/W4214624153","https://openalex.org/W4214755140","https://openalex.org/W4226106508","https://openalex.org/W4248809783","https://openalex.org/W4249726634","https://openalex.org/W4251137679","https://openalex.org/W4288049158","https://openalex.org/W4294766904","https://openalex.org/W4310416691","https://openalex.org/W4313145913","https://openalex.org/W4380926760","https://openalex.org/W4387148495","https://openalex.org/W4387545053","https://openalex.org/W4402191334","https://openalex.org/W4406458261","https://openalex.org/W6744290180"],"related_works":["https://openalex.org/W4389095575","https://openalex.org/W2908978341","https://openalex.org/W4294975495","https://openalex.org/W3123914255","https://openalex.org/W2992753629","https://openalex.org/W2134504249","https://openalex.org/W1204633849","https://openalex.org/W4238190924","https://openalex.org/W1481174557","https://openalex.org/W347409336"],"abstract_inverted_index":{"With":[0],"the":[1,18,60,81,90,108,170,220,236,246],"continuous":[2],"advancement":[3],"of":[4,41,62,92,173,240,249,275],"computer":[5],"vision,":[6],"image":[7],"processing":[8],"technologies,":[9],"volumetric":[10],"video,":[11],"represented":[12],"by":[13,85,244],"point":[14,63,159,174,221,250],"cloud":[15,64,160,175,222,251],"videos,":[16],"holds":[17],"potential":[19],"for":[20,68,113],"extensive":[21],"applications":[22],"in":[23,49,59,96,273],"areas":[24],"such":[25],"as":[26,39],"Virtual":[27],"Reality":[28,32],"(VR)":[29],"and":[30,52,75,127,218,234,238],"Augmented":[31],"(AR).":[33],"Viewport":[34,148],"prediction,":[35,44],"also":[36],"referred":[37],"to":[38,99,124,204],"Field":[40],"View":[42],"(FoV)":[43],"is":[45],"a":[46,56,142,164,184,213],"crucial":[47],"component":[48],"emerging":[50],"VR":[51],"AR":[53],"applications,":[54],"playing":[55],"vital":[57],"role":[58],"transmission":[61],"videos.":[65,106,161],"Currently,":[66],"models":[67],"viewpoint":[69],"prediction":[70,166],"that":[71,152,168,188,267],"integrate":[72],"feature":[73,115,130,228],"extraction":[74,116,239],"FoV":[76],"information":[77,192],"heavily":[78],"rely":[79],"on":[80],"spatial-temporal":[82,103,156,171,191,207,247],"features":[83,119,157,172,248],"extracted":[84],"convolutional":[86],"neural":[87],"networks.":[88],"However,":[89],"drawback":[91],"3D":[93],"convolution":[94],"lies":[95],"its":[97],"inability":[98],"effectively":[100,189],"capture":[101,205],"long-term":[102],"dependencies":[104],"within":[105,120],"Moreover,":[107],"temporal":[109,129,215,241],"contrast":[110,216],"layer":[111,217],"used":[112],"time":[114],"only":[117],"compares":[118],"each":[121],"block,":[122],"leading":[123],"matching":[125,232],"errors":[126,233],"inaccurate":[128],"extraction,":[131],"consequently":[132],"diminishing":[133],"predictive":[134],"performance.":[135,276],"To":[136],"address":[137],"these":[138],"limitations,":[139],"we":[140,182,198,211,258],"propose":[141],"Transformer-based":[143],"Volumetric":[144],"Point":[145],"Cloud":[146],"Video":[147],"Prediction":[149],"Network":[150],"(VPFormer)":[151],"can":[153],"efficiently":[154],"extract":[155],"from":[158],"VPFormer":[162],"constitutes":[163],"viewport":[165],"framework":[167],"combines":[169],"videos":[176,252],"with":[177,253],"user":[178,254,262],"trajectory":[179,256],"information.":[180,209],"Specifically,":[181],"introduce":[183,212],"novel":[185],"sampling":[186],"method":[187],"preserves":[190],"while":[193],"reducing":[194,231],"computational":[195],"complexity.":[196],"Additionally,":[197],"incorporate":[199],"context-aware":[200],"dynamic":[201],"positional":[202],"encoding":[203],"inter-frame":[206],"context":[208],"Subsequently,":[210],"voxel-based":[214],"partition":[219],"into":[223],"smaller":[224],"voxel":[225],"blocks":[226],"during":[227],"matching,":[229],"significantly":[230],"enhancing":[235],"analysis":[237],"features.":[242],"Finally,":[243],"combining":[245],"head":[255],"information,":[257],"successfully":[259],"predict":[260],"future":[261],"viewpoints.":[263],"Experimental":[264],"results":[265],"demonstrate":[266],"this":[268],"approach":[269],"outperforms":[270],"other":[271],"solutions":[272],"terms":[274]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
