{"id":"https://openalex.org/W7137878337","doi":"https://doi.org/10.1609/aaai.v40i11.37892","title":"PC-CrossDiff: Point-Cluster Dual-Level Cross-Modal Differential Attention for Unified 3D Referring and Segmentation","display_name":"PC-CrossDiff: Point-Cluster Dual-Level Cross-Modal Differential Attention for Unified 3D Referring and Segmentation","publication_year":2026,"publication_date":"2026-03-14","ids":{"openalex":"https://openalex.org/W7137878337","doi":"https://doi.org/10.1609/aaai.v40i11.37892"},"language":null,"primary_location":{"id":"doi:10.1609/aaai.v40i11.37892","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37892","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.1609/aaai.v40i11.37892","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129749147","display_name":"Wenbin Tan","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wenbin Tan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129738530","display_name":"Jiawen Lin","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiawen Lin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072392536","display_name":"Fangyong Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fangyong Wang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129643538","display_name":"Yuan Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yuan Xie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129686678","display_name":"Yong Xie","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yong Xie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129723679","display_name":"Yachao Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yachao Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129651728","display_name":"Yanyun Qu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yanyun Qu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5129749147"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11111111,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"40","issue":"11","first_page":"9332","last_page":"9340"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9812999963760376,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.0038999998942017555,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.001500000013038516,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7010999917984009},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.6660000085830688},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.5462999939918518},{"id":"https://openalex.org/keywords/differential","display_name":"Differential (mechanical device)","score":0.4587000012397766},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.38440001010894775},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.3684000074863434},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.3555000126361847},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3411000072956085},{"id":"https://openalex.org/keywords/referent","display_name":"Referent","score":0.33230000734329224}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7598999738693237},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7010999917984009},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.6660000085830688},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5855000019073486},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.5462999939918518},{"id":"https://openalex.org/C93226319","wikidata":"https://www.wikidata.org/wiki/Q193137","display_name":"Differential (mechanical device)","level":2,"score":0.4587000012397766},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.38440001010894775},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.3684000074863434},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.3555000126361847},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C2777096784","wikidata":"https://www.wikidata.org/wiki/Q3826351","display_name":"Referent","level":2,"score":0.33230000734329224},{"id":"https://openalex.org/C27362006","wikidata":"https://www.wikidata.org/wiki/Q272021","display_name":"Gestalt psychology","level":3,"score":0.31610000133514404},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.311599999666214},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.30640000104904175},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3059000074863434},{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.29490000009536743},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.2872999906539917},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.2865999937057495},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2784999907016754},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2784999907016754},{"id":"https://openalex.org/C108154423","wikidata":"https://www.wikidata.org/wiki/Q1469792","display_name":"Salience (neuroscience)","level":2,"score":0.27559998631477356},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2728999853134155},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.26460000872612},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2644999921321869},{"id":"https://openalex.org/C168993435","wikidata":"https://www.wikidata.org/wiki/Q6501125","display_name":"Ground","level":2,"score":0.2614000141620636}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1609/aaai.v40i11.37892","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37892","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1609/aaai.v40i11.37892","is_oa":true,"landing_page_url":"https://doi.org/10.1609/aaai.v40i11.37892","pdf_url":null,"source":{"id":"https://openalex.org/S4210191458","display_name":"Proceedings of the AAAI Conference on Artificial Intelligence","issn_l":"2159-5399","issn":["2159-5399","2374-3468"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310320058","host_organization_name":"Association for the Advancement of Artificial Intelligence","host_organization_lineage":["https://openalex.org/P4310320058"],"host_organization_lineage_names":["Association for the Advancement of Artificial Intelligence"],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the AAAI Conference on Artificial Intelligence","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7736025452613831,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"3D":[0],"Visual":[1],"Grounding":[2],"(3DVG)":[3],"aims":[4],"to":[5,141,157,241],"localize":[6],"the":[7,114,178,213,221,228,234],"referent":[8],"of":[9,66,79,224],"natural":[10],"language":[11],"referring":[12],"expressions":[13],"through":[14,170],"two":[15,57],"core":[16],"tasks:":[17],"Referring":[18],"Expression":[19],"Comprehension":[20],"(3DREC)":[21],"and":[22,76,111,130,181,200,216],"Segmentation":[23],"(3DRES).":[24],"While":[25],"existing":[26],"methods":[27,55],"achieve":[28],"high":[29],"accuracy":[30],"in":[31,41,48,60,87,184],"simple,":[32],"single-object":[33],"scenes,":[34],"they":[35],"suffer":[36],"from":[37,83],"severe":[38],"performance":[39,211],"degradation":[40],"complex,":[42,61],"multi-object":[43,62],"scenes":[44],"that":[45,123,151,195],"are":[46],"common":[47],"real-world":[49],"settings,":[50],"hindering":[51],"practical":[52],"deployment.":[53],"Existing":[54],"face":[56],"key":[58],"challenges":[59],"scenes:":[63],"inadequate":[64],"parsing":[65],"implicit":[67,135,243],"localization":[68,136],"cues":[69,137],"critical":[70],"for":[71,109,233],"disambiguating":[72],"visually":[73],"similar":[74],"objects,":[75,85],"ineffective":[77],"suppression":[78],"dynamic":[80],"spatial":[81,161,168,244],"interference":[82],"co-occurring":[84],"resulting":[86],"degraded":[88],"grounding":[89],"accuracy.":[90],"To":[91,176],"address":[92,177],"these":[93],"challenges,":[94],"we":[95,188],"propose":[96,189],"PC-CrossDiff,":[97],"a":[98,103,153,171,191],"unified":[99,192],"dual-task":[100],"framework":[101,115],"with":[102],"dual-level":[104],"cross-modal":[105],"differential":[106,126,173],"attention":[107,127,155,174],"architecture":[108],"3DREC":[110,235],"3DRES.":[112],"Specifically,":[113],"introduces:":[116],"(i)":[117],"Point-Level":[118],"Differential":[119,147],"Attention":[120,148],"(PLDA)":[121],"modules":[122,150],"apply":[124],"bidirectional":[125],"between":[128],"text":[129],"point":[131],"clouds,":[132],"adaptively":[133,158],"extracting":[134],"via":[138],"learnable":[139],"weights":[140],"improve":[142],"discriminative":[143],"representation;":[144],"(ii)":[145],"Cluster-Level":[146],"(CLDA)":[149],"establish":[152],"hierarchical":[154],"mechanism":[156],"enhance":[159],"localization-relevant":[160],"relationships":[162],"while":[163],"suppressing":[164],"ambiguous":[165],"or":[166],"irrelevant":[167],"relations":[169],"localization-aware":[172],"block.":[175],"scale":[179],"disparity":[180],"conflicting":[182],"gradients":[183],"joint":[185],"3DREC\u20133DRES":[186],"training,":[187],"L_DGTL,":[190],"loss":[193],"function":[194],"explicitly":[196],"reduces":[197],"multi-task":[198],"crosstalk":[199],"enables":[201],"effective":[202],"parameter":[203],"sharing":[204],"across":[205],"tasks.":[206],"Our":[207],"method":[208],"achieves":[209],"state-of-the-art":[210],"on":[212,220],"ScanRefer,":[214,225],"NR3D,":[215],"SR3D":[217],"benchmarks.":[218],"Notably,":[219],"Implicit":[222],"subsets":[223],"it":[226],"improves":[227],"Overall@0.50":[229],"score":[230],"by":[231],"+10.16%":[232],"task,":[236],"highlighting":[237],"its":[238],"strong":[239],"ability":[240],"parse":[242],"cues.":[245]},"counts_by_year":[],"updated_date":"2026-03-18T06:31:55.123368","created_date":"2026-03-18T00:00:00"}
