{"id":"https://openalex.org/W7125949966","doi":"https://doi.org/10.1109/tip.2026.3652003","title":"ThinkMatter: Panoramic-Aware Instructional Semantics for Monocular Vision-and-Language Navigation","display_name":"ThinkMatter: Panoramic-Aware Instructional Semantics for Monocular Vision-and-Language Navigation","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7125949966","doi":"https://doi.org/10.1109/tip.2026.3652003","pmid":"https://pubmed.ncbi.nlm.nih.gov/41605153"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2026.3652003","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2026.3652003","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://ink.library.smu.edu.sg/sis_research/10905","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5124070348","display_name":"Guangzhao Dai","orcid":null},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Guangzhao Dai","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-4111-9334","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100400167","display_name":"Shuo Wang","orcid":"https://orcid.org/0000-0002-6720-1646"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Wang","raw_affiliation_strings":["School of Information, Renmin University of China, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0002-6720-1646","affiliations":[{"raw_affiliation_string":"School of Information, Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Hao Zhao","orcid":"https://orcid.org/0000-0001-7903-581X"},"institutions":[{"id":"https://openalex.org/I99065089","display_name":"Tsinghua University","ror":"https://ror.org/03cve4549","country_code":"CN","type":"education","lineage":["https://openalex.org/I99065089"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hao Zhao","raw_affiliation_strings":["Institute for AI Industry Research, Tsinghua University, Beijing, China"],"raw_orcid":"https://orcid.org/0000-0001-7903-581X","affiliations":[{"raw_affiliation_string":"Institute for AI Industry Research, Tsinghua University, Beijing, China","institution_ids":["https://openalex.org/I99065089"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124128501","display_name":"Bin Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Bin Zhu","raw_affiliation_strings":["School of Computing and Information Systems, Singapore Management University (SMU), 81 Victoria Street, Singapore"],"raw_orcid":"https://orcid.org/0000-0002-9213-2611","affiliations":[{"raw_affiliation_string":"School of Computing and Information Systems, Singapore Management University (SMU), 81 Victoria Street, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101633158","display_name":"Qianru Sun","orcid":"https://orcid.org/0000-0003-2689-317X"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Qianru Sun","raw_affiliation_strings":["School of Computing and Information Systems, Singapore Management University (SMU), 81 Victoria Street, Singapore"],"raw_orcid":"https://orcid.org/0000-0003-2689-317X","affiliations":[{"raw_affiliation_string":"School of Computing and Information Systems, Singapore Management University (SMU), 81 Victoria Street, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124127291","display_name":"Xiangbo Shu","orcid":null},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangbo Shu","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-4902-4663","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5124070348"],"corresponding_institution_ids":["https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.16260561,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"35","issue":null,"first_page":"1937","last_page":"1950"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9387000203132629,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9387000203132629,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.0210999995470047,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.007400000002235174,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/monocular","display_name":"Monocular","score":0.8313999772071838},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.6873999834060669},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.652999997138977},{"id":"https://openalex.org/keywords/embodied-cognition","display_name":"Embodied cognition","score":0.40529999136924744},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.398499995470047},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.3686999976634979}],"concepts":[{"id":"https://openalex.org/C65909025","wikidata":"https://www.wikidata.org/wiki/Q1945033","display_name":"Monocular","level":2,"score":0.8313999772071838},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7716000080108643},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.6873999834060669},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.652999997138977},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6524999737739563},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6244999766349792},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4092000126838684},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.40529999136924744},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.398499995470047},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3686999976634979},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.3668999969959259},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.35670000314712524},{"id":"https://openalex.org/C158829959","wikidata":"https://www.wikidata.org/wiki/Q1640606","display_name":"Monocular vision","level":2,"score":0.3499000072479248},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.31450000405311584},{"id":"https://openalex.org/C2775955345","wikidata":"https://www.wikidata.org/wiki/Q7449071","display_name":"Semantic mapping","level":2,"score":0.2556000053882599},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.2542000114917755}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/tip.2026.3652003","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2026.3652003","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:41605153","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/41605153","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-11907","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/10905","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/TIP.2026.3652003","raw_type":"Journal Article"}],"best_oa_location":{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-11907","is_oa":true,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/10905","pdf_url":null,"source":{"id":"https://openalex.org/S4306401925","display_name":"Singapore Management University Institutional Knowledge (InK) (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/TIP.2026.3652003","raw_type":"Journal Article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6818509697914124}],"awards":[{"id":"https://openalex.org/G1861350651","display_name":null,"funder_award_id":"62427808","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6626320309","display_name":null,"funder_award_id":"U25A20442","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2927812634","https://openalex.org/W2962974533","https://openalex.org/W2963800628","https://openalex.org/W2964339842","https://openalex.org/W3009928773","https://openalex.org/W3091259027","https://openalex.org/W3096831136","https://openalex.org/W3100923070","https://openalex.org/W3109085430","https://openalex.org/W3202040256","https://openalex.org/W3203338521","https://openalex.org/W3206064780","https://openalex.org/W4200150166","https://openalex.org/W4214879935","https://openalex.org/W4226052928","https://openalex.org/W4312253995","https://openalex.org/W4312434279","https://openalex.org/W4312544224","https://openalex.org/W4312996039","https://openalex.org/W4383108296","https://openalex.org/W4385318467","https://openalex.org/W4386065508","https://openalex.org/W4386071616","https://openalex.org/W4390871755","https://openalex.org/W4390871940","https://openalex.org/W4390872665","https://openalex.org/W4391506026","https://openalex.org/W4394627367","https://openalex.org/W4399485142","https://openalex.org/W4401417232","https://openalex.org/W4402353981","https://openalex.org/W4402716032","https://openalex.org/W4402773335","https://openalex.org/W4404199606","https://openalex.org/W4405844902","https://openalex.org/W4409560874","https://openalex.org/W4410949566","https://openalex.org/W4411086358","https://openalex.org/W4414050622","https://openalex.org/W4416748461"],"related_works":[],"abstract_inverted_index":{"Vision-and-Language":[0],"Navigation":[1],"in":[2,36,176],"continuous":[3],"environments":[4,181],"(VLN-CE)":[5],"requires":[6],"an":[7],"embodied":[8],"robot":[9],"to":[10,92,116],"navigate":[11],"the":[12,16,41,108,111,130,133,137,142,149,171,177,183],"target":[13],"destination":[14],"following":[15],"natural":[17],"language":[18,153],"instruction.":[19,172],"Most":[20],"existing":[21],"methods":[22,34],"use":[23],"panoramic":[24,45,114],"RGB-D":[25,46],"cameras":[26,60],"for":[27,70,84,191],"360\u00b0":[28],"observation":[29,126],"of":[30,40,44,61,64,136,145,152,185],"environments.":[31],"However,":[32],"these":[33],"struggle":[35],"real-world":[37,180,192],"applications":[38],"because":[39],"higher":[42],"cost":[43],"cameras.":[47],"This":[48],"paper":[49],"studies":[50],"a":[51,81,188],"low-cost":[52],"and":[53,73,100,179],"practical":[54],"VLN-CE":[55],"setting,":[56],"e.g.,":[57],"using":[58],"monocular":[59,85,90,158],"limited":[62],"field":[63],"view,":[65],"which":[66,140],"means":[67],"\"Look":[68],"Less\"":[69],"visual":[71],"observations":[72],"environment":[74,162],"semantics.":[75,104],"In":[76],"this":[77],"paper,":[78],"we":[79,88,106],"propose":[80],"ThinkMatter":[82],"framework":[83],"VLN-CE,":[86],"where":[87],"motivate":[89],"robots":[91,159],"\"Think":[93],"More\"":[94],"by":[95,110,132],"1)":[96],"generating":[97],"novel":[98,118],"views":[99,119],"2)":[101],"integrating":[102],"instruction":[103],"Specifically,":[105],"achieve":[107,129],"former":[109],"proposed":[112,134],"3DGS-based":[113],"generation":[115],"render":[117],"at":[120],"each":[121],"step,":[122],"based":[123],"on":[124],"past":[125],"collections.":[127],"We":[128],"latter":[131],"enhancement":[135],"occupancy-instruction":[138],"semantics,":[139],"integrates":[141],"spatial":[143],"semantics":[144,151],"occupancy":[146],"maps":[147],"with":[148,160,170],"textual":[150],"instructions.":[154],"These":[155],"operations":[156],"promote":[157],"wider":[161],"perceptions":[163],"as":[164,166],"well":[165],"transparent":[167],"semantic":[168],"connections":[169],"Both":[173],"extensive":[174],"experiments":[175],"simulators":[178],"demonstrate":[182],"effectiveness":[184],"ThinkMatter,":[186],"providing":[187],"promising":[189],"practice":[190],"navigation.":[193]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2026-01-29T00:00:00"}
