{"id":"https://openalex.org/W7160415450","doi":"https://doi.org/10.48550/arxiv.2605.03639","title":"Diffusion Masked Pretraining for Dynamic Point Cloud","display_name":"Diffusion Masked Pretraining for Dynamic Point Cloud","publication_year":2026,"publication_date":"2026-05-05","ids":{"openalex":"https://openalex.org/W7160415450","doi":"https://doi.org/10.48550/arxiv.2605.03639"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.03639","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.03639","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5135497676","display_name":"Zhuoyue Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Zhuoyue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135422352","display_name":"Jihua Zhu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhu, Jihua","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135533170","display_name":"Chaowei Fang","orcid":"https://orcid.org/0000-0001-8805-9792"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fang, Chaowei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135477336","display_name":"Jian Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Jian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120058266","display_name":"Ajmal Saeed Mian","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mian, Ajmal Saeed","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.5174000263214111,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.5174000263214111,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.14949999749660492,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12290","display_name":"Human Motion and Animation","score":0.11729999631643295,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5708000063896179},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4819999933242798},{"id":"https://openalex.org/keywords/diffusion","display_name":"Diffusion","score":0.4025000035762787},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.39419999718666077},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.37619999051094055},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.34610000252723694},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.3441999852657318},{"id":"https://openalex.org/keywords/conditional-probability-distribution","display_name":"Conditional probability distribution","score":0.3384000062942505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6413999795913696},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5708000063896179},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.5268999934196472},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4819999933242798},{"id":"https://openalex.org/C69357855","wikidata":"https://www.wikidata.org/wiki/Q163214","display_name":"Diffusion","level":2,"score":0.4025000035762787},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.39419999718666077},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.37619999051094055},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3741999864578247},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3465999960899353},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.34610000252723694},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.3441999852657318},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34389999508857727},{"id":"https://openalex.org/C43555835","wikidata":"https://www.wikidata.org/wiki/Q2300258","display_name":"Conditional probability distribution","level":2,"score":0.3384000062942505},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.3100999891757965},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C47446073","wikidata":"https://www.wikidata.org/wiki/Q5165890","display_name":"Control theory (sociology)","level":3,"score":0.30720001459121704},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.28380000591278076},{"id":"https://openalex.org/C68710425","wikidata":"https://www.wikidata.org/wiki/Q5275442","display_name":"Diffusion process","level":3,"score":0.2824000120162964},{"id":"https://openalex.org/C107551265","wikidata":"https://www.wikidata.org/wiki/Q1458245","display_name":"Displacement (psychology)","level":2,"score":0.27230000495910645},{"id":"https://openalex.org/C2778999744","wikidata":"https://www.wikidata.org/wiki/Q7208292","display_name":"Point target","level":3,"score":0.2720000147819519},{"id":"https://openalex.org/C38976095","wikidata":"https://www.wikidata.org/wiki/Q752641","display_name":"Asymmetry","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C88871306","wikidata":"https://www.wikidata.org/wiki/Q7208287","display_name":"Point process","level":2,"score":0.2685999870300293},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C104114177","wikidata":"https://www.wikidata.org/wiki/Q79782","display_name":"Motion (physics)","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.25780001282691956}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.03639","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.03639","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.03639","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Dynamic":[0],"point":[1,70],"cloud":[2],"pretraining":[3],"is":[4],"still":[5],"dominated":[6],"by":[7,46],"masked":[8,91],"reconstruction":[9],"objectives.":[10],"However,":[11],"these":[12,56],"objectives":[13],"inherit":[14],"two":[15],"key":[16],"limitations.":[17],"Existing":[18],"methods":[19],"inject":[20],"ground-truth":[21],"tube":[22,92],"centers":[23,97],"as":[24,110,121],"decoder":[25],"positional":[26,30,78,104],"embeddings,":[27],"causing":[28],"spatio-temporal":[29,100],"leakage.":[31],"Moreover,":[32],"they":[33],"supervise":[34],"inter-frame":[35,118],"motion":[36,81],"with":[37,169],"deterministic":[38,154],"proxy":[39],"targets":[40],"that":[41,159],"systematically":[42],"discard":[43],"distributional":[44],"structure":[45],"collapsing":[47,150],"multimodal":[48],"trajectory":[49],"uncertainty":[50],"into":[51,76],"conditional":[52,139],"means.":[53],"To":[54],"address":[55],"limitations,":[57],"we":[58],"propose":[59],"Diffusion":[60],"Masked":[61],"Pretraining":[62],"(DiMP),":[63],"a":[64,122,145,152],"unified":[65],"self-supervised":[66],"framework":[67],"for":[68],"dynamic":[69],"clouds.":[71],"DiMP":[72,114,160],"introduces":[73],"diffusion":[74,87],"modeling":[75],"both":[77],"inference":[79],"and":[80,178],"learning.":[82],"It":[83],"first":[84],"applies":[85],"forward":[86],"noise":[88],"only":[89],"to":[90,135,151],"centers,":[93],"then":[94],"predicts":[95],"clean":[96,111],"from":[98],"visible":[99,108],"context.":[101],"This":[102,130],"removes":[103],"leakage":[105],"while":[106],"preserving":[107],"coordinates":[109],"temporal":[112],"anchors.":[113],"also":[115],"reformulates":[116],"point-wise":[117],"displacement":[119],"supervision":[120],"DDPM":[123],"noise-prediction":[124],"objective":[125],"conditioned":[126],"on":[127,174],"decoded":[128],"representations.":[129],"design":[131],"drives":[132],"the":[133,137,166],"encoder":[134],"target":[136],"full":[138],"distribution":[140],"of":[141,172],"plausible":[142],"motions":[143],"under":[144,180],"variational":[146],"surrogate,":[147],"rather":[148],"than":[149],"single":[153],"estimate.":[155],"Extensive":[156],"experiments":[157],"demonstrate":[158],"consistently":[161],"improves":[162],"downstream":[163],"accuracy":[164],"over":[165],"backbone":[167],"alone,":[168],"absolute":[170],"gains":[171],"11.21%":[173],"offline":[175],"action":[176],"segmentation":[177],"13.65%":[179],"causally":[181],"constrained":[182],"online":[183],"inference.Codes":[184],"are":[185],"available":[186],"at":[187],"https://github.com/InitalZ/DiMP.git.":[188]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-07T00:00:00"}
