{"id":"https://openalex.org/W7117461634","doi":"https://doi.org/10.1109/dicta68720.2025.11302439","title":"PointQA: Multi-Modality Guided Cross-Attention for 3D Visual Question Answering on Point Clouds","display_name":"PointQA: Multi-Modality Guided Cross-Attention for 3D Visual Question Answering on Point Clouds","publication_year":2025,"publication_date":"2025-12-03","ids":{"openalex":"https://openalex.org/W7117461634","doi":"https://doi.org/10.1109/dicta68720.2025.11302439"},"language":null,"primary_location":{"id":"doi:10.1109/dicta68720.2025.11302439","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302439","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084350939","display_name":"Muhammad Zeeshan Khan","orcid":"https://orcid.org/0000-0002-7905-3345"},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Muhammad Zeeshan Khan","raw_affiliation_strings":["School of Information Technology Deakin University,Geelong,Australia"],"affiliations":[{"raw_affiliation_string":"School of Information Technology Deakin University,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5121469600","display_name":"Anuroop Gaddam","orcid":null},"institutions":[{"id":"https://openalex.org/I149704539","display_name":"Deakin University","ror":"https://ror.org/02czsnj07","country_code":"AU","type":"education","lineage":["https://openalex.org/I149704539"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Anuroop Gaddam","raw_affiliation_strings":["School of Information Technology Deakin University,Geelong,Australia"],"affiliations":[{"raw_affiliation_string":"School of Information Technology Deakin University,Geelong,Australia","institution_ids":["https://openalex.org/I149704539"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5084350939"],"corresponding_institution_ids":["https://openalex.org/I149704539"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.65617357,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8398000001907349,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.8398000001907349,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.051600001752376556,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10719","display_name":"3D Shape Modeling and Analysis","score":0.04089999943971634,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.7573999762535095},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5110999941825867},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49070000648498535},{"id":"https://openalex.org/keywords/grasp","display_name":"GRASP","score":0.4681999981403351},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.46650001406669617},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.44929999113082886},{"id":"https://openalex.org/keywords/point-cloud","display_name":"Point cloud","score":0.44589999318122864},{"id":"https://openalex.org/keywords/spatial-contextual-awareness","display_name":"Spatial contextual awareness","score":0.42570000886917114},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.42480000853538513},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.4147000014781952}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7864000201225281},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.7573999762535095},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5577999949455261},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5110999941825867},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49070000648498535},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.4681999981403351},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.46650001406669617},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.44929999113082886},{"id":"https://openalex.org/C131979681","wikidata":"https://www.wikidata.org/wiki/Q1899648","display_name":"Point cloud","level":2,"score":0.44589999318122864},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.42570000886917114},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.42480000853538513},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.4147000014781952},{"id":"https://openalex.org/C16311509","wikidata":"https://www.wikidata.org/wiki/Q4148050","display_name":"Dependency graph","level":3,"score":0.38940000534057617},{"id":"https://openalex.org/C2777508537","wikidata":"https://www.wikidata.org/wiki/Q7936620","display_name":"Visual reasoning","level":2,"score":0.37720000743865967},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3734999895095825},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.36660000681877136},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C155911833","wikidata":"https://www.wikidata.org/wiki/Q3817354","display_name":"Spatial intelligence","level":2,"score":0.334199994802475},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.3239000141620636},{"id":"https://openalex.org/C179372163","wikidata":"https://www.wikidata.org/wiki/Q1406181","display_name":"Scene graph","level":3,"score":0.31619998812675476},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3098999857902527},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3003999888896942},{"id":"https://openalex.org/C141353440","wikidata":"https://www.wikidata.org/wiki/Q182221","display_name":"Fuse (electrical)","level":2,"score":0.2948000133037567},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.2922999858856201},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.28600001335144043},{"id":"https://openalex.org/C139002025","wikidata":"https://www.wikidata.org/wiki/Q3001212","display_name":"Lift (data mining)","level":2,"score":0.2831000089645386},{"id":"https://openalex.org/C2776505523","wikidata":"https://www.wikidata.org/wiki/Q4785468","display_name":"Plan (archaeology)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.26170000433921814},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2590999901294708},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.25609999895095825},{"id":"https://openalex.org/C79974875","wikidata":"https://www.wikidata.org/wiki/Q483639","display_name":"Cloud computing","level":2,"score":0.2551000118255615},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.25450000166893005}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/dicta68720.2025.11302439","is_oa":false,"landing_page_url":"https://doi.org/10.1109/dicta68720.2025.11302439","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 International Conference on Digital Image Computing: Techniques and Applications (DICTA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6715876460075378,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1522734439","https://openalex.org/W1933349210","https://openalex.org/W1947481528","https://openalex.org/W2064675550","https://openalex.org/W2157331557","https://openalex.org/W2194775991","https://openalex.org/W2277195237","https://openalex.org/W2560730294","https://openalex.org/W2594519801","https://openalex.org/W2745461083","https://openalex.org/W2964157791","https://openalex.org/W2981422075","https://openalex.org/W2988715931","https://openalex.org/W3034949383","https://openalex.org/W3092767330","https://openalex.org/W3095974555","https://openalex.org/W4212951146","https://openalex.org/W4226376247","https://openalex.org/W4310463627","https://openalex.org/W4312377093","https://openalex.org/W4312846625","https://openalex.org/W4385245566","https://openalex.org/W4389474301","https://openalex.org/W4393148430","https://openalex.org/W4403791572","https://openalex.org/W4409366009"],"related_works":[],"abstract_inverted_index":{"Answering":[0],"questions":[1],"about":[2],"3D":[3,131],"environments":[4],"is":[5,127],"a":[6,12,48,66],"frontier":[7],"challenge":[8,60],"in":[9],"AI,":[10],"demanding":[11],"nuanced":[13],"grasp":[14],"of":[15,125],"spatial":[16,71],"reasoning":[17],"that":[18,53,69,113,122],"distinguishes":[19],"it":[20],"from":[21,76],"2D":[22],"VQA.":[23],"The":[24],"dominant":[25],"paradigm":[26],"for":[27,97,129,136],"this":[28,57,61],"task":[29],"relies":[30],"on":[31,50,102],"pre-constructed":[32],"scene":[33],"graphs":[34],"to":[35,87],"explicitly":[36],"model":[37,82],"inter-object":[38],"relationships.":[39],"This":[40],"approach,":[41],"however,":[42],"introduces":[43],"architectural":[44],"complexity":[45],"and":[46,72,91,138],"creates":[47],"dependency":[49],"intermediate":[51],"representations":[52],"lack":[54],"robustness.":[55],"In":[56],"paper,":[58],"we":[59],"convention":[62],"by":[63],"introducing":[64],"PointQA,":[65],"streamlined":[67],"architecture":[68],"learns":[70],"semantic":[73],"context":[74],"directly":[75],"raw":[77],"point":[78],"cloud":[79],"data.":[80],"Our":[81,119],"employs":[83],"guided":[84],"cross-modal":[85],"attention":[86,126],"dynamically":[88],"fuse":[89],"visual":[90],"linguistic":[92],"features,":[93],"bypassing":[94],"the":[95,103,134],"need":[96],"explicit":[98],"graph":[99],"construction.":[100],"Evaluated":[101],"CLEVR3D-Real":[104],"dataset,":[105],"PointQA":[106],"achieves":[107],"state-of-the-art":[108],"performance,":[109],"outperforming":[110],"prior":[111],"methods":[112],"require":[114],"more":[115,139],"complex":[116],"structural":[117],"inputs.":[118],"work":[120],"demonstrates":[121],"judicious":[123],"use":[124],"sufficient":[128],"high-level":[130],"reasoning,":[132],"paving":[133],"way":[135],"simpler":[137],"scalable":[140],"VQA":[141],"models.":[142]},"counts_by_year":[],"updated_date":"2025-12-30T23:08:21.542490","created_date":"2025-12-29T00:00:00"}
