{"id":"https://openalex.org/W7092292440","doi":"https://doi.org/10.1109/lra.2025.3623037","title":"GaussianVLM: Scene-Centric 3D Vision-Language Models Using Language-Aligned Gaussian Splats for Embodied Reasoning and Beyond","display_name":"GaussianVLM: Scene-Centric 3D Vision-Language Models Using Language-Aligned Gaussian Splats for Embodied Reasoning and Beyond","publication_year":2025,"publication_date":"2025-10-17","ids":{"openalex":"https://openalex.org/W7092292440","doi":"https://doi.org/10.1109/lra.2025.3623037"},"language":null,"primary_location":{"id":"doi:10.1109/lra.2025.3623037","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3623037","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Anna-Maria Halacheva","orcid":"https://orcid.org/0009-0005-1053-9063"},"institutions":[{"id":"https://openalex.org/I58918642","display_name":"Sofia University \"St. Kliment Ohridski\"","ror":"https://ror.org/02jv3k292","country_code":"BG","type":"education","lineage":["https://openalex.org/I58918642"]}],"countries":["BG"],"is_corresponding":true,"raw_author_name":"Anna-Maria Halacheva","raw_affiliation_strings":["INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria","institution_ids":["https://openalex.org/I58918642"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jan-Nico Zaech","orcid":"https://orcid.org/0000-0003-2566-0841"},"institutions":[{"id":"https://openalex.org/I58918642","display_name":"Sofia University \"St. Kliment Ohridski\"","ror":"https://ror.org/02jv3k292","country_code":"BG","type":"education","lineage":["https://openalex.org/I58918642"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Jan-Nico Zaech","raw_affiliation_strings":["INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria","institution_ids":["https://openalex.org/I58918642"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Xi Wang","orcid":"https://orcid.org/0000-0001-5442-1116"},"institutions":[{"id":"https://openalex.org/I58918642","display_name":"Sofia University \"St. Kliment Ohridski\"","ror":"https://ror.org/02jv3k292","country_code":"BG","type":"education","lineage":["https://openalex.org/I58918642"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Xi Wang","raw_affiliation_strings":["INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria","institution_ids":["https://openalex.org/I58918642"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Danda Pani Paudel","orcid":"https://orcid.org/0000-0002-1739-1867"},"institutions":[{"id":"https://openalex.org/I58918642","display_name":"Sofia University \"St. Kliment Ohridski\"","ror":"https://ror.org/02jv3k292","country_code":"BG","type":"education","lineage":["https://openalex.org/I58918642"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Danda Pani Paudel","raw_affiliation_strings":["INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria","institution_ids":["https://openalex.org/I58918642"]}]},{"author_position":"last","author":{"id":null,"display_name":"Luc Van Gool","orcid":"https://orcid.org/0000-0002-3445-5711"},"institutions":[{"id":"https://openalex.org/I58918642","display_name":"Sofia University \"St. Kliment Ohridski\"","ror":"https://ror.org/02jv3k292","country_code":"BG","type":"education","lineage":["https://openalex.org/I58918642"]}],"countries":["BG"],"is_corresponding":false,"raw_author_name":"Luc Van Gool","raw_affiliation_strings":["INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria"],"affiliations":[{"raw_affiliation_string":"INSAIT, Sofia University &#x201C;St. Kliment Ohridski&#x201D;, Sofia, Bulgaria","institution_ids":["https://openalex.org/I58918642"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I58918642"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.55499494,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"10","issue":"12","first_page":"12588","last_page":"12595"},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9599000215530396,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9599000215530396,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.008100000210106373,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.0032999999821186066,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5878999829292297},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.5823000073432922},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.527899980545044},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5270000100135803},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5123999714851379},{"id":"https://openalex.org/keywords/3d-model","display_name":"3d model","score":0.48260000348091125},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.4368000030517578},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.4002000093460083}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7111999988555908},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6169000267982483},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5878999829292297},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.5823000073432922},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5683000087738037},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5270000100135803},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5123999714851379},{"id":"https://openalex.org/C3019007443","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3d model","level":2,"score":0.48260000348091125},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.4368000030517578},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.4002000093460083},{"id":"https://openalex.org/C108882727","wikidata":"https://www.wikidata.org/wiki/Q2991685","display_name":"Solid modeling","level":2,"score":0.35749998688697815},{"id":"https://openalex.org/C2780980858","wikidata":"https://www.wikidata.org/wiki/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C61224824","wikidata":"https://www.wikidata.org/wiki/Q2260434","display_name":"Mixture model","level":2,"score":0.33570000529289246},{"id":"https://openalex.org/C2777897806","wikidata":"https://www.wikidata.org/wiki/Q568742","display_name":"3D modeling","level":2,"score":0.3230000138282776},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.28790000081062317},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.2816999852657318},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.27300000190734863},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.2720000147819519},{"id":"https://openalex.org/C193581530","wikidata":"https://www.wikidata.org/wiki/Q683778","display_name":"Structured light","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2590000033378601},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.25780001282691956},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.25609999895095825}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2025.3623037","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2025.3623037","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W2101105183","https://openalex.org/W2471962767","https://openalex.org/W2506483933","https://openalex.org/W2519683295","https://openalex.org/W2930283066","https://openalex.org/W2950697717","https://openalex.org/W2966683369","https://openalex.org/W2970641574","https://openalex.org/W3095974555","https://openalex.org/W3168640669","https://openalex.org/W3182910454","https://openalex.org/W4226376247","https://openalex.org/W4386065742","https://openalex.org/W4386076628","https://openalex.org/W4389667233","https://openalex.org/W4390872495","https://openalex.org/W4390873101","https://openalex.org/W4390873312","https://openalex.org/W4401024570","https://openalex.org/W4402667891","https://openalex.org/W4402716423","https://openalex.org/W4402727108","https://openalex.org/W4402753560","https://openalex.org/W4402961736","https://openalex.org/W4404024963","https://openalex.org/W4405785674","https://openalex.org/W4409262625","https://openalex.org/W4413145624","https://openalex.org/W4414079054"],"related_works":[],"abstract_inverted_index":{"As":[0],"multimodal":[1],"language":[2,75],"models":[3],"advance,":[4],"their":[5],"application":[6],"to":[7],"3D":[8,19,47,50,70,125,140],"scene":[9,59,71,113],"understanding":[10],"is":[11],"a":[12,45,92],"fast-growing":[13],"frontier,":[14],"driving":[15],"the":[16,69,86,118],"development":[17],"of":[18,138],"Vision-Language":[20],"Models":[21],"(VLMs).":[22],"Current":[23],"methods":[24],"show":[25],"strong":[26,133],"dependence":[27],"on":[28],"object":[29],"detectors,":[30],"introducing":[31],"processing":[32],"bottlenecks":[33],"and":[34,57,104,111],"limitations":[35],"in":[36,146],"taxonomic":[37],"flexibility.":[38],"To":[39,84],"address":[40],"these":[41],"limitations,":[42],"we":[43,90,116],"propose":[44],"scene-centric":[46],"VLM":[48,141],"for":[49],"Gaussian":[51,78,120],"splat":[52],"scenes":[53],"that":[54,95],"employs":[55],"language-":[56],"task-aware":[58,109],"representations.":[60],"Our":[61],"approach":[62],"directly":[63],"embeds":[64],"rich":[65],"linguistic":[66],"features":[67],"into":[68,98],"representation":[72],"by":[73],"associating":[74],"with":[76],"each":[77],"primitive,":[79],"achieving":[80],"early":[81],"modality":[82],"alignment.":[83],"process":[85],"resulting":[87],"dense":[88],"representations,":[89],"introduce":[91],"dual":[93],"sparsifier":[94],"distills":[96],"them":[97],"compact,":[99],"task-relevant":[100],"tokens":[101],"via":[102],"task-guided":[103],"location-guided":[105],"pathways,":[106],"producing":[107],"sparse,":[108],"global":[110],"local":[112],"tokens.":[114],"Notably,":[115],"present":[117],"first":[119],"splatting-based":[121],"VLM,":[122],"leveraging":[123],"photorealistic":[124],"representations":[126],"derived":[127],"from":[128],"standard":[129],"RGB":[130],"images,":[131],"demonstrating":[132],"generalization:":[134],"it":[135],"improves":[136],"performance":[137],"prior":[139],"(LL3DA":[142],"[8])":[143],"five":[144],"folds,":[145],"out-of-the-domain":[147],"settings.":[148]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-18T00:00:00"}
