{"id":"https://openalex.org/W4416202709","doi":"https://doi.org/10.1109/tvcg.2025.3631434","title":"Toward a Unified Representation of Multi-Modal Pre-Training for 3-D Processing","display_name":"Toward a Unified Representation of Multi-Modal Pre-Training for 3-D Processing","publication_year":2025,"publication_date":"2025-11-12","ids":{"openalex":"https://openalex.org/W4416202709","doi":"https://doi.org/10.1109/tvcg.2025.3631434","pmid":"https://pubmed.ncbi.nlm.nih.gov/41223109"},"language":"en","primary_location":{"id":"doi:10.1109/tvcg.2025.3631434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvcg.2025.3631434","pdf_url":null,"source":{"id":"https://openalex.org/S84775595","display_name":"IEEE Transactions on Visualization and Computer Graphics","issn_l":"1077-2626","issn":["1077-2626","1941-0506","2160-9306"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Visualization and Computer Graphics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109819800","display_name":"Ben Fei","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ben Fei","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-3219-9996","affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yixuan Li","orcid":"https://orcid.org/0000-0002-9229-7555"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yixuan Li","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-9229-7555","affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Weidong Yang","orcid":"https://orcid.org/0000-0002-6473-9272"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weidong Yang","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-6473-9272","affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039580875","display_name":"Lipeng Ma","orcid":"https://orcid.org/0000-0001-5974-5988"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lipeng Ma","raw_affiliation_strings":["School of Computer Science, Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0001-5974-5988","affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":null,"display_name":"Ying He","orcid":"https://orcid.org/0000-0002-6749-4485"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Ying He","raw_affiliation_strings":["College of Computing and Data Science, Nanyang Technological University, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-6749-4485","affiliations":[{"raw_affiliation_string":"College of Computing and Data Science, Nanyang Technological University, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.30776844,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"32","issue":"2","first_page":"2216","last_page":"2229"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.6876999735832214,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.6876999735832214,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.09040000289678574,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.04309999942779541,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.7565000057220459},{"id":"https://openalex.org/keywords/rendering","display_name":"Rendering (computer graphics)","score":0.6079999804496765},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5722000002861023},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.428600013256073},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4251999855041504},{"id":"https://openalex.org/keywords/view-synthesis","display_name":"View synthesis","score":0.4169999957084656},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.414900004863739},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4009000062942505},{"id":"https://openalex.org/keywords/graphics","display_name":"Graphics","score":0.39089998602867126}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8345000147819519},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.7565000057220459},{"id":"https://openalex.org/C205711294","wikidata":"https://www.wikidata.org/wiki/Q176953","display_name":"Rendering (computer graphics)","level":2,"score":0.6079999804496765},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5722000002861023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5365999937057495},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.428600013256073},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4251999855041504},{"id":"https://openalex.org/C2776449333","wikidata":"https://www.wikidata.org/wiki/Q7928781","display_name":"View synthesis","level":3,"score":0.4169999957084656},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.414900004863739},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4009000062942505},{"id":"https://openalex.org/C21442007","wikidata":"https://www.wikidata.org/wiki/Q1027879","display_name":"Graphics","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3903000056743622},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.38679999113082886},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.3853999972343445},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.3824000060558319},{"id":"https://openalex.org/C77660652","wikidata":"https://www.wikidata.org/wiki/Q150971","display_name":"Computer graphics","level":2,"score":0.36239999532699585},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3483999967575073},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.3188999891281128},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2928999960422516},{"id":"https://openalex.org/C172367668","wikidata":"https://www.wikidata.org/wiki/Q6504956","display_name":"Data visualization","level":3,"score":0.28610000014305115},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.2775999903678894},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.27549999952316284},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.2662000060081482},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.25429999828338623},{"id":"https://openalex.org/C502989409","wikidata":"https://www.wikidata.org/wiki/Q11425","display_name":"Animation","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tvcg.2025.3631434","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tvcg.2025.3631434","pdf_url":null,"source":{"id":"https://openalex.org/S84775595","display_name":"IEEE Transactions on Visualization and Computer Graphics","issn_l":"1077-2626","issn":["1077-2626","1941-0506","2160-9306"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Visualization and Computer Graphics","raw_type":"journal-article"},{"id":"pmid:41223109","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41223109","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on visualization and computer graphics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":70,"referenced_works":["https://openalex.org/W1920022804","https://openalex.org/W2114084204","https://openalex.org/W2133623148","https://openalex.org/W2194775991","https://openalex.org/W2460657278","https://openalex.org/W2553307952","https://openalex.org/W2594519801","https://openalex.org/W2698857938","https://openalex.org/W2796426482","https://openalex.org/W2805499196","https://openalex.org/W2806332096","https://openalex.org/W2886499109","https://openalex.org/W2892230984","https://openalex.org/W2901537885","https://openalex.org/W2953668091","https://openalex.org/W2960986959","https://openalex.org/W2963158438","https://openalex.org/W2979750740","https://openalex.org/W2981440248","https://openalex.org/W2985088149","https://openalex.org/W2988715931","https://openalex.org/W2990613095","https://openalex.org/W3025708905","https://openalex.org/W3034459762","https://openalex.org/W3035524453","https://openalex.org/W3092064176","https://openalex.org/W3109518641","https://openalex.org/W3111535274","https://openalex.org/W3116959466","https://openalex.org/W3119708198","https://openalex.org/W3164678585","https://openalex.org/W3189651322","https://openalex.org/W3191573718","https://openalex.org/W3192240783","https://openalex.org/W3194197043","https://openalex.org/W3197097949","https://openalex.org/W3197141874","https://openalex.org/W3199992965","https://openalex.org/W3202611145","https://openalex.org/W3203898101","https://openalex.org/W3206075451","https://openalex.org/W4210427181","https://openalex.org/W4214624153","https://openalex.org/W4214755140","https://openalex.org/W4287025408","https://openalex.org/W4312270234","https://openalex.org/W4312317653","https://openalex.org/W4312788538","https://openalex.org/W4312818263","https://openalex.org/W4312980726","https://openalex.org/W4315631092","https://openalex.org/W4318340592","https://openalex.org/W4377707861","https://openalex.org/W4379527433","https://openalex.org/W4386453468","https://openalex.org/W4386954497","https://openalex.org/W4387966490","https://openalex.org/W4389474301","https://openalex.org/W4389747813","https://openalex.org/W4389778567","https://openalex.org/W4390044614","https://openalex.org/W4390872570","https://openalex.org/W4390872658","https://openalex.org/W4391128914","https://openalex.org/W4391696924","https://openalex.org/W4392693642","https://openalex.org/W4392910312","https://openalex.org/W4409641030","https://openalex.org/W4412567585","https://openalex.org/W4414758224"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,47,57,152],"growing":[2],"demand":[3],"for":[4,18,157],"real-world":[5],"3-D":[6,12,84,144,159],"understanding,":[7],"learning":[8,38],"effective":[9,104],"representations":[10],"of":[11,49,59,154,169],"data":[13],"has":[14,35],"become":[15],"increasingly":[16],"important":[17],"tasks":[19],"such":[20],"as":[21],"shape":[22],"classification,":[23,145],"model":[24],"retrieval,":[25],"scene":[26],"reconstruction,":[27],"and":[28,61,83,123,147,161,171],"point":[29,43,85,129],"cloud":[30],"completion.":[31,148],"Although":[32],"previous":[33],"work":[34],"explored":[36],"self-supervised":[37,141],"within":[39],"individual":[40],"modalities":[41],"(e.g.,":[42],"clouds":[44,86],"or":[45],"images),":[46],"potential":[48,163],"multi-modal":[50,155],"supervision":[51],"remains":[52],"largely":[53],"underexplored":[54],"due":[55],"to":[56,87,164],"lack":[58],"aligned":[60],"scalable":[62],"training":[63],"signals.":[64],"In":[65],"this":[66],"work,":[67],"we":[68],"present":[69],"DR-Point,":[70],"a":[71,89,166],"tri-modal":[72],"pre-training":[73,156],"framework":[74],"that":[75,136],"jointly":[76],"learns":[77],"from":[78],"RGB":[79],"images,":[80],"depth":[81,121],"maps,":[82],"build":[88],"unified":[90,158],"embedding":[91],"space":[92],"across":[93],"modalities.":[94],"By":[95],"enforcing":[96],"cross-modal":[97],"consistency":[98],"among":[99],"RGB-depth-point":[100],"triplets,":[101],"DR-Point":[102,137],"achieves":[103],"2-D-3-D":[105],"feature":[106],"alignment":[107],"without":[108],"manual":[109],"annotations.":[110],"A":[111],"differentiable":[112],"rendering":[113],"module":[114],"further":[115],"enhances":[116],"geometric":[117],"fidelity":[118],"by":[119],"synthesizing":[120],"cues":[122],"refining":[124],"structural":[125],"details":[126],"in":[127],"reconstructed":[128],"clouds.":[130],"Extensive":[131],"experiments":[132],"on":[133,143],"benchmarks":[134],"demonstrate":[135],"consistently":[138],"outperforms":[139],"state-of-the-art":[140],"methods":[142],"segmentation,":[146],"These":[149],"results":[150],"highlight":[151],"advantages":[153],"understanding":[160],"its":[162],"benefit":[165],"wide":[167],"range":[168],"vision":[170],"graphics":[172],"applications.":[173]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-11-12T00:00:00"}
