{"id":"https://openalex.org/W4413017604","doi":"https://doi.org/10.1109/tcsvt.2025.3596386","title":"NavComposer: Composing Language Instructions for Navigation Trajectories Through Action-Scene-Object Modularization","display_name":"NavComposer: Composing Language Instructions for Navigation Trajectories Through Action-Scene-Object Modularization","publication_year":2025,"publication_date":"2025-08-06","ids":{"openalex":"https://openalex.org/W4413017604","doi":"https://doi.org/10.1109/tcsvt.2025.3596386"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2025.3596386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3596386","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2507.10894","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5089600727","display_name":"Zongtao He","orcid":"https://orcid.org/0000-0002-2166-4450"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Zongtao He","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080359598","display_name":"Liuyi Wang","orcid":"https://orcid.org/0000-0003-1368-0300"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liuyi Wang","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Lu Chen","orcid":"https://orcid.org/0009-0005-2257-6814"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lu Chen","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037285766","display_name":"Chengju Liu","orcid":"https://orcid.org/0000-0001-7543-0855"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chengju Liu","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5073789459","display_name":"Qijun Chen","orcid":"https://orcid.org/0000-0001-5644-1188"},"institutions":[{"id":"https://openalex.org/I116953780","display_name":"Tongji University","ror":"https://ror.org/03rc6as71","country_code":"CN","type":"education","lineage":["https://openalex.org/I116953780"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qijun Chen","raw_affiliation_strings":["Department of Control Science and Engineering, Tongji University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Department of Control Science and Engineering, Tongji University, Shanghai, China","institution_ids":["https://openalex.org/I116953780"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5089600727"],"corresponding_institution_ids":["https://openalex.org/I116953780"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09975493,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"36","issue":"1","first_page":"913","last_page":"929"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11450","display_name":"Model-Driven Software Engineering Techniques","score":0.9767000079154968,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9749000072479248,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8128781914710999},{"id":"https://openalex.org/keywords/modular-programming","display_name":"Modular programming","score":0.6824408769607544},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6410101056098938},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5696164965629578},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5251721739768982},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4953650236129761},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4521801471710205},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.42013275623321533},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4099878668785095},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3734080195426941}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8128781914710999},{"id":"https://openalex.org/C88482812","wikidata":"https://www.wikidata.org/wiki/Q6453666","display_name":"Modular programming","level":2,"score":0.6824408769607544},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6410101056098938},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5696164965629578},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5251721739768982},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4953650236129761},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4521801471710205},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.42013275623321533},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4099878668785095},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3734080195426941},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcsvt.2025.3596386","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2025.3596386","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2507.10894","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.10894","pdf_url":"https://arxiv.org/pdf/2507.10894","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2507.10894","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2507.10894","pdf_url":"https://arxiv.org/pdf/2507.10894","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G5736016220","display_name":null,"funder_award_id":"62233013","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G716395483","display_name":null,"funder_award_id":"62473295","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G843004458","display_name":null,"funder_award_id":"624B2105","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2095911326","https://openalex.org/W2393702438","https://openalex.org/W2391644119","https://openalex.org/W2060456459","https://openalex.org/W2368574244","https://openalex.org/W2372613066","https://openalex.org/W2354835639","https://openalex.org/W2164104454","https://openalex.org/W3038096277","https://openalex.org/W2604548540"],"abstract_inverted_index":{"Language-guided":[0],"navigation":[1,50,105,122,161],"is":[2],"a":[3,43,98,115,136],"cornerstone":[4],"of":[5,76,83,92,139,144,181],"embodied":[6],"AI,":[7],"enabling":[8],"agents":[9],"to":[10,103],"interpret":[11],"language":[12,68],"instructions":[13,20,123],"and":[14,61,63,90,131,157,168,175],"navigate":[15],"complex":[16],"environments.":[17],"However,":[18],"expert-provided":[19],"are":[21],"limited":[22],"in":[23,97],"quantity,":[24],"while":[25,79],"synthesized":[26],"annotations":[27],"often":[28],"lack":[29],"quality,":[30,141],"making":[31],"them":[32,65],"insufficient":[33],"for":[34,46,178],"large-scale":[35],"research.":[36,170],"To":[37],"address":[38],"this,":[39],"we":[40,112],"propose":[41],"NavComposer,":[42,111],"novel":[44],"framework":[45],"automatically":[47],"generating":[48],"high-quality":[49],"instructions.":[51,69,93],"NavComposer":[52],"explicitly":[53],"decomposes":[54],"semantic":[55,84,129],"entities":[56,85],"such":[57],"as":[58],"actions,":[59],"scenes,":[60],"objects,":[62],"recomposes":[64],"into":[66],"natural":[67],"Its":[70],"modular":[71],"architecture":[72],"allows":[73],"flexible":[74],"integration":[75],"state-of-the-art":[77],"techniques,":[78],"the":[80,88,179],"explicit":[81],"use":[82],"enhances":[86],"both":[87],"richness":[89],"accuracy":[91],"Moreover,":[94],"it":[95],"operates":[96],"data-agnostic":[99],"manner,":[100],"supporting":[101],"adaptation":[102],"diverse":[104],"trajectories":[106],"without":[107],"domain-specific":[108],"training.":[109],"Complementing":[110],"introduce":[113],"NavInstrCritic,":[114],"comprehensive":[116],"annotation-free":[117],"evaluation":[118,138,158],"system":[119],"that":[120,147],"assesses":[121],"on":[124,150],"three":[125],"dimensions:":[126],"contrastive":[127],"matching,":[128],"consistency,":[130],"linguistic":[132],"diversity.":[133],"NavInstrCritic":[134],"provides":[135],"holistic":[137],"instruction":[140,155],"addressing":[142],"limitations":[143],"traditional":[145],"metrics":[146],"rely":[148],"heavily":[149],"expert":[151],"annotations.":[152],"By":[153],"decoupling":[154],"generation":[156],"from":[159],"specific":[160],"agents,":[162],"our":[163,182],"method":[164],"enables":[165],"more":[166],"scalable":[167],"generalizable":[169],"Extensive":[171],"experiments":[172],"provide":[173],"direct":[174],"practical":[176],"evidence":[177],"effectiveness":[180],"method.":[183]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
