{"id":"https://openalex.org/W4415540393","doi":"https://doi.org/10.1145/3746027.3754773","title":"GaussianCross: Cross-modal Self-supervised 3D Representation Learning via Gaussian Splatting","display_name":"GaussianCross: Cross-modal Self-supervised 3D Representation Learning via Gaussian Splatting","publication_year":2025,"publication_date":"2025-10-25","ids":{"openalex":"https://openalex.org/W4415540393","doi":"https://doi.org/10.1145/3746027.3754773"},"language":null,"primary_location":{"id":"doi:10.1145/3746027.3754773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3754773","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3746027.3754773","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101659036","display_name":"Lei Yao","orcid":"https://orcid.org/0009-0007-0304-3056"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Lei Yao","raw_affiliation_strings":["Hong Kong Polytechnic University, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0007-0304-3056","affiliations":[{"raw_affiliation_string":"Hong Kong Polytechnic University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100784213","display_name":"Yi Wang","orcid":"https://orcid.org/0000-0001-8659-4724"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yi Wang","raw_affiliation_strings":["Hong Kong Polytechnic University, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0001-8659-4724","affiliations":[{"raw_affiliation_string":"Hong Kong Polytechnic University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108133275","display_name":"Yi Zhang","orcid":"https://orcid.org/0009-0009-8242-1581"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yi Zhang","raw_affiliation_strings":["Hong Kong Polytechnic University, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0009-0009-8242-1581","affiliations":[{"raw_affiliation_string":"Hong Kong Polytechnic University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016553220","display_name":"Moyun Liu","orcid":"https://orcid.org/0000-0002-4530-2606"},"institutions":[{"id":"https://openalex.org/I47720641","display_name":"Huazhong University of Science and Technology","ror":"https://ror.org/00p991c53","country_code":"CN","type":"education","lineage":["https://openalex.org/I47720641"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Moyun Liu","raw_affiliation_strings":["Huazhong University of Science and Technology, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-4530-2606","affiliations":[{"raw_affiliation_string":"Huazhong University of Science and Technology, Wuhan, China","institution_ids":["https://openalex.org/I47720641"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044722301","display_name":"Lap\u2010Pui Chau","orcid":"https://orcid.org/0000-0003-4932-0593"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Lap-Pui Chau","raw_affiliation_strings":["Hong Kong Polytechnic University, Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0003-4932-0593","affiliations":[{"raw_affiliation_string":"Hong Kong Polytechnic University, Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101659036"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.15620498,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"6500","last_page":"6509"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6234999895095825},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5351999998092651},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.49399998784065247},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4862000048160553},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.46399998664855957},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.44920000433921814},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.44760000705718994},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.42739999294281006},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41280001401901245}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8418999910354614},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6561999917030334},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6234999895095825},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5351999998092651},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.49399998784065247},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4862000048160553},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.46399998664855957},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.44920000433921814},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.44760000705718994},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4334000051021576},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.42739999294281006},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41280001401901245},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.41029998660087585},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.4011000096797943},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.39590001106262207},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3707999885082245},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.36000001430511475},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.3231000006198883},{"id":"https://openalex.org/C2779038628","wikidata":"https://www.wikidata.org/wiki/Q7248497","display_name":"Programming by demonstration","level":3,"score":0.3203999996185303},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.2808000147342682},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.2770000100135803},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.27639999985694885},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2745000123977661},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2551000118255615}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3746027.3754773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3754773","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:doi:10.48550/arxiv.2508.02172","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"}],"best_oa_location":{"id":"doi:10.1145/3746027.3754773","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3746027.3754773","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":3,"referenced_works":["https://openalex.org/W1999478155","https://openalex.org/W3035524453","https://openalex.org/W4200150166"],"related_works":[],"abstract_inverted_index":{"The":[0,195],"significance":[1],"of":[2,110,158,192],"informative":[3],"and":[4,27,42,88,113,133,142,153,178,184,198],"robust":[5],"point":[6,36,75],"representations":[7],"has":[8],"been":[9],"widely":[10],"acknowledged":[11],"for":[12],"3D":[13,55,61,74,103],"scene":[14],"understanding.":[15],"Despite":[16],"existing":[17],"self-supervised":[18,54],"pre-training":[19],"counterparts":[20],"demonstrating":[21],"promising":[22],"performance,":[23],"the":[24,171,190],"model":[25],"collapse":[26],"structural":[28],"information":[29],"deficiency":[30],"remain":[31],"prevalent":[32],"due":[33],"to":[34,66,100,116,161],"insufficient":[35],"discrimination":[37],"difficulty,":[38],"yielding":[39],"unreliable":[40],"expressions":[41],"suboptimal":[43],"performance.":[44],"In":[45,135],"this":[46],"paper,":[47],"we":[48,123],"present":[49],"GaussianCross,":[50,122],"a":[51,78,92,102,139],"novel":[52],"cross-modal":[53,118],"representation":[56,82],"learning":[57],"architecture":[58],"integrating":[59],"feed-forward":[60],"Gaussian":[62,81],"Splatting":[63],"(3DGS)":[64],"techniques":[65],"address":[67],"current":[68],"challenges.":[69],"GaussianCross":[70,137,165],"seamlessly":[71],"converts":[72],"scale-inconsistent":[73],"clouds":[76],"into":[77],"unified":[79],"cuboid-normalized":[80],"without":[83],"missing":[84],"details,":[85],"enabling":[86],"stable":[87],"generalizable":[89],"pre-training.":[90],"Subsequently,":[91],"tri-attribute":[93],"adaptive":[94],"distillation":[95],"splatting":[96],"module":[97],"is":[98],"incorporated":[99],"construct":[101],"feature":[104,108],"field,":[105],"facilitating":[106],"synergetic":[107],"capturing":[109],"appearance,":[111],"geometry,":[112],"semantic":[114,183],"cues":[115],"maintain":[117],"consistency.":[119],"To":[120],"validate":[121],"perform":[124],"extensive":[125],"evaluations":[126],"on":[127,181],"various":[128],"benchmarks,":[129],"including":[130],"ScanNet,":[131],"ScanNet200,":[132],"S3DIS.":[134],"particular,":[136],"shows":[138],"prominent":[140],"parameter":[141],"data":[143,155],"efficiency,":[144],"achieving":[145],"superior":[146],"performance":[147],"through":[148],"linear":[149],"probing":[150],"(<0.1%":[151],"parameters)":[152],"limited":[154],"training":[156],"(1%":[157],"scenes)":[159],"compared":[160],"state-of-the-art":[162],"methods.":[163],"Furthermore,":[164],"demonstrates":[166],"strong":[167],"generalization":[168],"capabilities,":[169],"improving":[170],"full":[172],"fine-tuning":[173],"accuracy":[174],"by":[175],"9.3%":[176],"mIoU":[177],"6.1%":[179],"AP50":[180],"ScanNet200":[182],"instance":[185],"segmentation":[186],"tasks,":[187],"respectively,":[188],"supporting":[189],"effectiveness":[191],"our":[193],"approach.":[194],"code,":[196],"weights,":[197],"visualizations":[199],"are":[200],"publicly":[201],"available":[202],"at":[203],"https://rayyoh.github.io/GaussianCross/.":[204]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-25T00:00:00"}
