{"id":"https://openalex.org/W4413925620","doi":"https://doi.org/10.1109/icra55743.2025.11128818","title":"Logic-RAG: Augmenting Large Multimodal Models with Visual-Spatial Knowledge for Road Scene Understanding","display_name":"Logic-RAG: Augmenting Large Multimodal Models with Visual-Spatial Knowledge for Road Scene Understanding","publication_year":2025,"publication_date":"2025-05-19","ids":{"openalex":"https://openalex.org/W4413925620","doi":"https://doi.org/10.1109/icra55743.2025.11128818"},"language":"en","primary_location":{"id":"doi:10.1109/icra55743.2025.11128818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032309808","display_name":"Imran Kabir","orcid":null},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Imran Kabir","raw_affiliation_strings":["College of Information Sciences and Technology, Pennsylvania State University,State College,PA,USA,16801"],"affiliations":[{"raw_affiliation_string":"College of Information Sciences and Technology, Pennsylvania State University,State College,PA,USA,16801","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000312028","display_name":"Md Alimoor Reza","orcid":"https://orcid.org/0000-0001-7692-817X"},"institutions":[{"id":"https://openalex.org/I87213936","display_name":"Drake University","ror":"https://ror.org/001skmk61","country_code":"US","type":"education","lineage":["https://openalex.org/I87213936"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Alimoor Reza","raw_affiliation_strings":["Drake University,Department of Mathematics and Computer Science,Des Moines,IA,USA,50311"],"affiliations":[{"raw_affiliation_string":"Drake University,Department of Mathematics and Computer Science,Des Moines,IA,USA,50311","institution_ids":["https://openalex.org/I87213936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005834738","display_name":"Syed Masum Billah","orcid":"https://orcid.org/0000-0001-5063-3808"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Syed Billah","raw_affiliation_strings":["College of Information Sciences and Technology, Pennsylvania State University,State College,PA,USA,16801"],"affiliations":[{"raw_affiliation_string":"College of Information Sciences and Technology, Pennsylvania State University,State College,PA,USA,16801","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5032309808"],"corresponding_institution_ids":["https://openalex.org/I130769515"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35006423,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"10585","last_page":"10591"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13282","display_name":"Automated Road and Building Extraction","score":0.9696999788284302,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13282","display_name":"Automated Road and Building Extraction","score":0.9696999788284302,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11606","display_name":"Infrastructure Maintenance and Monitoring","score":0.9610999822616577,"subfield":{"id":"https://openalex.org/subfields/2205","display_name":"Civil and Structural Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9571999907493591,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7082125544548035},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4609419107437134},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4088551998138428},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.35729682445526123}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7082125544548035},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4609419107437134},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4088551998138428},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.35729682445526123}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra55743.2025.11128818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra55743.2025.11128818","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7312718072","display_name":null,"funder_award_id":"2326406","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1528063097","https://openalex.org/W2146881125","https://openalex.org/W2150066425","https://openalex.org/W2775701113","https://openalex.org/W3014641072","https://openalex.org/W3035086574","https://openalex.org/W3174522583","https://openalex.org/W3210065386","https://openalex.org/W4206760982","https://openalex.org/W4312230726","https://openalex.org/W4312261477","https://openalex.org/W4312815172","https://openalex.org/W4319300976","https://openalex.org/W4381802186","https://openalex.org/W4389519818","https://openalex.org/W4389520792","https://openalex.org/W4401386967","https://openalex.org/W4401414574","https://openalex.org/W4413328432"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"Large":[0],"multimodal":[1],"models":[2,100],"(LMMs)":[3],"are":[4,193],"increasingly":[5],"integrated":[6],"into":[7],"autonomous":[8,96,187],"driving":[9,44,83,97,107,114,188],"systems":[10],"for":[11,23,94,186],"user":[12,27],"interaction.":[13],"However,":[14],"their":[15,121],"limitations":[16],"in":[17,43,56,170,184],"fine-grained":[18],"spatial":[19,41,181],"reasoning":[20,182],"pose":[21],"challenges":[22],"system":[24],"interpretability":[25],"and":[26,67,81,109,126,162,173,191],"trust.":[28],"We":[29,72],"introduce":[30],"Logic-RAG,":[31],"a":[32,48,61,64,68],"novel":[33],"Retrieval-Augmented":[34],"Generation":[35],"(RAG)":[36],"framework":[37],"that":[38,133],"improves":[39],"LMMs'":[40],"understanding":[42],"scenarios.":[45],"Logic-RAG":[46,74,119,143,149,178],"constructs":[47],"dynamic":[49],"knowledge":[50,169],"base":[51],"(KB)":[52],"about":[53],"object-object":[54],"relationships":[55],"first-order":[57],"logic":[58],"(FOL)":[59],"using":[60,78,86],"perception":[62],"module,":[63],"query-to-logic":[65],"embedder,":[66],"logical":[69,136],"inference":[70],"engine.":[71],"evaluated":[73],"on":[75,105,112],"visual-spatial":[76],"queries":[77],"both":[79,171],"synthetic":[80,106],"real-world":[82,113],"videos.":[84],"When":[85],"popular":[87],"LMMs":[88,185],"(GPT-4V,":[89],"Claude":[90],"3.5)":[91],"as":[92],"proxies":[93],"an":[95],"system,":[98],"these":[99],"achieved":[101],"only":[102],"55%":[103],"accuracy":[104,146],"scenes":[108],"under":[110],"75%":[111],"scenes.":[115],"Augmenting":[116],"them":[117],"with":[118,159],"increased":[120],"accuracies":[122],"to":[123,166],"over":[124],"80%":[125],"90%,":[127],"respectively.":[128],"An":[129],"ablation":[130],"study":[131],"showed":[132],"even":[134],"without":[135],"inference,":[137],"the":[138],"fact-based":[139],"context":[140],"constructed":[141],"by":[142,147],"alone":[144],"improved":[145,160],"15%.":[148],"is":[150],"extensible:":[151],"it":[152],"allows":[153],"seamless":[154],"replacement":[155],"of":[156],"individual":[157],"components":[158],"versions":[161],"enables":[163],"domain":[164],"experts":[165],"compose":[167],"new":[168],"FOL":[172],"natural":[174],"language.":[175],"In":[176],"sum,":[177],"addresses":[179],"critical":[180],"deficiencies":[183],"applications.":[189],"Code":[190],"data":[192],"available":[194],"at:":[195],"https://github.com/Imran2205/LogicRAG.":[196]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
