{"id":"https://openalex.org/W4385768219","doi":"https://doi.org/10.24963/ijcai.2023/88","title":"Joint-MAE: 2D-3D Joint Masked Autoencoders for 3D Point Cloud Pre-training","display_name":"Joint-MAE: 2D-3D Joint Masked Autoencoders for 3D Point Cloud Pre-training","publication_year":2023,"publication_date":"2023-08-01","ids":{"openalex":"https://openalex.org/W4385768219","doi":"https://doi.org/10.24963/ijcai.2023/88"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2023/88","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/88","pdf_url":"https://www.ijcai.org/proceedings/2023/0088.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2023/0088.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100766486","display_name":"Ziyu Guo","orcid":"https://orcid.org/0000-0002-0310-3959"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ziyu Guo","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077326779","display_name":"Renrui Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Renrui Zhang","raw_affiliation_strings":["CUHK MMLab"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"CUHK MMLab","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089497160","display_name":"Longtian Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I30809798","display_name":"ShanghaiTech University","ror":"https://ror.org/030bhh786","country_code":"CN","type":"education","lineage":["https://openalex.org/I30809798"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Longtian Qiu","raw_affiliation_strings":["ShanghaiTech University"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"ShanghaiTech University","institution_ids":["https://openalex.org/I30809798"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101763339","display_name":"Xianzhi Li","orcid":"https://orcid.org/0000-0001-6835-5607"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xianzhi Li","raw_affiliation_strings":["Huazhong University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5032708386","display_name":"Pheng\u2010Ann Heng","orcid":"https://orcid.org/0000-0003-3055-5034"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"CN","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Pheng-Ann Heng","raw_affiliation_strings":["Department of Computer Science and Engineering, The Chinese University of Hong Kong","Institute of Medical Intelligence and XR, The Chinese University of Hong Kong"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]},{"raw_affiliation_string":"Institute of Medical Intelligence and XR, The Chinese University of Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100766486"],"corresponding_institution_ids":["https://openalex.org/I177725633"],"apc_list":null,"apc_paid":null,"fwci":10.2428,"has_fulltext":false,"cited_by_count":50,"citation_normalized_percentile":{"value":0.99338688,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"791","last_page":"799"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10531","display_name":"Advanced Vision and Imaging","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.8598875999450684},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7736926078796387},{"id":"https://openalex.org/keywords/joint","display_name":"Joint (building)","score":0.6811108589172363},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.638796329498291},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5693511366844177},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5426698923110962},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.4708516001701355},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.43910765647888184},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4329093396663666},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.413100004196167},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.41299015283584595},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.39068603515625},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12437191605567932}],"concepts":[{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.8598875999450684},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7736926078796387},{"id":"https://openalex.org/C18555067","wikidata":"https://www.wikidata.org/wiki/Q8375051","display_name":"Joint (building)","level":2,"score":0.6811108589172363},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.638796329498291},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5693511366844177},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5426698923110962},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.4708516001701355},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.43910765647888184},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4329093396663666},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.413100004196167},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41299015283584595},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.39068603515625},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12437191605567932},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C170154142","wikidata":"https://www.wikidata.org/wiki/Q150737","display_name":"Architectural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.24963/ijcai.2023/88","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/88","pdf_url":"https://www.ijcai.org/proceedings/2023/0088.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2023/88","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2023/88","pdf_url":"https://www.ijcai.org/proceedings/2023/0088.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Thirty-Second International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321920","display_name":"Innovation and Technology Commission","ror":"https://ror.org/04vf9tr09"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4385768219.pdf"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W1920022804","https://openalex.org/W2272361785","https://openalex.org/W2553307952","https://openalex.org/W2560609797","https://openalex.org/W2560722161","https://openalex.org/W2796426482","https://openalex.org/W2963121255","https://openalex.org/W2963140844","https://openalex.org/W2963443993","https://openalex.org/W2963719584","https://openalex.org/W2979750740","https://openalex.org/W2981440248","https://openalex.org/W3034459762","https://openalex.org/W3119708198","https://openalex.org/W3128716822","https://openalex.org/W3153465022","https://openalex.org/W3166396011","https://openalex.org/W3166446055","https://openalex.org/W3172363814","https://openalex.org/W3182683290","https://openalex.org/W3202611145","https://openalex.org/W3213647938","https://openalex.org/W4280617157","https://openalex.org/W4281922683","https://openalex.org/W4283838088","https://openalex.org/W4287818077","https://openalex.org/W4288623616","https://openalex.org/W4288795044","https://openalex.org/W4309805219","https://openalex.org/W4312045746","https://openalex.org/W4312270234","https://openalex.org/W4312317653","https://openalex.org/W4312788538","https://openalex.org/W4312818263","https://openalex.org/W4312980726","https://openalex.org/W4313156423","https://openalex.org/W4319301012","https://openalex.org/W4382458283","https://openalex.org/W4385245566","https://openalex.org/W4386075705","https://openalex.org/W4386790226","https://openalex.org/W4388976122","https://openalex.org/W4394671432"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W2385859805","https://openalex.org/W2530972254","https://openalex.org/W4389574804","https://openalex.org/W4390516098","https://openalex.org/W3016928466","https://openalex.org/W2936725271","https://openalex.org/W2037549926","https://openalex.org/W2374013449","https://openalex.org/W2181948922"],"abstract_inverted_index":{"Masked":[0],"Autoencoders":[1],"(MAE)":[2],"have":[3],"shown":[4],"promising":[5],"performance":[6,166],"in":[7],"self-supervised":[8,72],"learning":[9],"for":[10,71,147,155,174],"both":[11],"2D":[12,46,56,88],"and":[13,42,47,63,85,90,117,123,151,179],"3D":[14,60,73,82,139],"computer":[15],"vision.":[16],"However,":[17],"existing":[18],"MAE-style":[19],"methods":[20],"can":[21,58],"only":[22],"learn":[23],"from":[24],"the":[25,39,55,93,97,138,183],"data":[26],"of":[27,96,128,186],"a":[28,66,114,118,152],"single":[29],"modality,":[30],"i.e.,":[31],"either":[32],"images":[33],"or":[34],"point":[35,74,83],"clouds,":[36],"which":[37,142],"neglect":[38],"implicit":[40],"semantic":[41,149],"geometric":[43,157],"correlation":[44],"between":[45],"3D.":[48],"In":[49],"this":[50],"paper,":[51],"we":[52,104,130],"explore":[53],"how":[54],"modality":[57],"benefit":[59],"masked":[61,94],"autoencoding,":[62],"propose":[64],"Joint-MAE,":[65],"2D-3D":[67,111,148,156],"joint":[68,115,119],"MAE":[69],"framework":[70],"cloud":[75,84],"pre-training.":[76],"Joint-MAE":[77,163],"randomly":[78],"masks":[79],"an":[80],"input":[81],"its":[86],"projected":[87],"images,":[89],"then":[91],"reconstructs":[92],"information":[95],"two":[98,109,133],"modalities.":[99],"For":[100],"better":[101],"cross-modal":[102,134],"interaction,":[103],"construct":[105],"our":[106,160],"JointMAE":[107],"by":[108],"hierarchical":[110],"embedding":[112],"modules,":[113],"encoder,":[116],"decoder":[120],"with":[121],"modal-shared":[122],"model-specific":[124],"decoders.":[125],"On":[126],"top":[127],"this,":[129],"further":[131],"introduce":[132],"strategies":[135],"to":[136],"boost":[137],"representation":[140],"learning,":[141],"are":[143],"local-aligned":[144],"attention":[145],"mechanisms":[146],"cues,":[150],"cross-reconstruction":[153],"loss":[154],"constraints.":[158],"By":[159],"pre-training":[161],"paradigm,":[162],"achieves":[164],"superior":[165],"on":[167,177,182],"multiple":[168],"downstream":[169],"tasks,":[170],"e.g.,":[171],"92.4%":[172],"accuracy":[173,181],"linear":[175],"SVM":[176],"ModelNet40":[178],"86.07%":[180],"hardest":[184],"split":[185],"ScanObjectNN.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":25},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":10}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
