{"id":"https://openalex.org/W4403791663","doi":"https://doi.org/10.1145/3664647.3681150","title":"Narrowing the Gap between Vision and Action in Navigation","display_name":"Narrowing the Gap between Vision and Action in Navigation","publication_year":2024,"publication_date":"2024-10-26","ids":{"openalex":"https://openalex.org/W4403791663","doi":"https://doi.org/10.1145/3664647.3681150"},"language":"en","primary_location":{"id":"doi:10.1145/3664647.3681150","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681150","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681150?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681150?download=true","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109603230","display_name":"Yue Zhang","orcid":"https://orcid.org/0000-0003-2153-6536"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yue Zhang","raw_affiliation_strings":["Michigan State University, East Lansing, USA"],"raw_orcid":"https://orcid.org/0000-0003-2153-6536","affiliations":[{"raw_affiliation_string":"Michigan State University, East Lansing, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087682739","display_name":"Parisa Kordjamshidi","orcid":"https://orcid.org/0000-0002-4606-1824"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Parisa Kordjamshidi","raw_affiliation_strings":["Michigan State University, East Lansing, USA"],"raw_orcid":"https://orcid.org/0000-0002-4606-1824","affiliations":[{"raw_affiliation_string":"Michigan State University, East Lansing, USA","institution_ids":["https://openalex.org/I87216513"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5109603230"],"corresponding_institution_ids":["https://openalex.org/I87216513"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20278117,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"856","last_page":"865"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12624","display_name":"Maritime and Coastal Archaeology","score":0.8450000286102295,"subfield":{"id":"https://openalex.org/subfields/1204","display_name":"Archeology"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12624","display_name":"Maritime and Coastal Archaeology","score":0.8450000286102295,"subfield":{"id":"https://openalex.org/subfields/1204","display_name":"Archeology"},"field":{"id":"https://openalex.org/fields/12","display_name":"Arts and Humanities"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11622","display_name":"Maritime Navigation and Safety","score":0.7670999765396118,"subfield":{"id":"https://openalex.org/subfields/2212","display_name":"Ocean Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13215","display_name":"Global Maritime and Colonial Histories","score":0.6908000111579895,"subfield":{"id":"https://openalex.org/subfields/3314","display_name":"Anthropology"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6290380358695984},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5761048793792725},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5489188432693481},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43169790506362915},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33311158418655396},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.08352389931678772}],"concepts":[{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6290380358695984},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5761048793792725},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5489188432693481},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43169790506362915},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33311158418655396},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.08352389931678772},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664647.3681150","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681150","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681150?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3664647.3681150","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664647.3681150","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664647.3681150?download=true","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Climate action","id":"https://metadata.un.org/sdg/13","score":0.5199999809265137}],"awards":[{"id":"https://openalex.org/G8876996369","display_name":null,"funder_award_id":"N00014","funder_id":"https://openalex.org/F4320337345","funder_display_name":"Office of Naval Research"}],"funders":[{"id":"https://openalex.org/F4320337345","display_name":"Office of Naval Research","ror":"https://ror.org/00rk2pe57"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4403791663.pdf","grobid_xml":"https://content.openalex.org/works/W4403791663.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W2117539524","https://openalex.org/W2194775991","https://openalex.org/W2951973805","https://openalex.org/W2953127211","https://openalex.org/W2963800628","https://openalex.org/W2964339842","https://openalex.org/W2979727876","https://openalex.org/W3009928773","https://openalex.org/W3034500398","https://openalex.org/W3035232877","https://openalex.org/W3109085430","https://openalex.org/W3174887918","https://openalex.org/W3192009892","https://openalex.org/W3206064780","https://openalex.org/W4226052928","https://openalex.org/W4226058394","https://openalex.org/W4285137176","https://openalex.org/W4285284019","https://openalex.org/W4306705228","https://openalex.org/W4312253995","https://openalex.org/W4312434279","https://openalex.org/W4312544224","https://openalex.org/W4312996039","https://openalex.org/W4390872665","https://openalex.org/W4390873167"],"related_works":["https://openalex.org/W2772917594","https://openalex.org/W2036807459","https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398"],"abstract_inverted_index":{"The":[0],"existing":[1,99],"methods":[2],"for":[3],"Vision":[4],"and":[5,32,63,105,145,170,200],"Language":[6],"Navigation":[7],"in":[8,95,115],"the":[9,20,24,45,52,71,90,98,117,139,147,152,158,180,194],"Continuous":[10],"Environment":[11],"(VLN-CE)":[12],"commonly":[13],"incorporate":[14],"a":[15,28,128],"waypoint":[16,100,160],"predictor":[17,161],"to":[18,38,84,109,143,151,193],"discretize":[19,70],"environment.":[21],"This":[22],"simplifies":[23],"navigation":[25,34,189],"actions":[26],"into":[27],"view":[29,79,150],"selection":[30],"task":[31],"improves":[33],"performance":[35,190],"significantly":[36],"compared":[37,192],"direct":[39],"training":[40],"using":[41],"low-level":[42,91,129,153,201],"actions.":[43,65,120,183,202],"However,":[44],"VLN-CE":[46,67],"agents":[47,68],"are":[48,57,74],"still":[49],"far":[50],"from":[51],"real":[53],"robots":[54],"since":[55],"there":[56],"gaps":[58],"between":[59],"their":[60,106],"visual":[61,72,149,164],"perception":[62],"executed":[64],"First,":[66],"that":[69],"environment":[73],"primarily":[75],"trained":[76,133],"with":[77,134],"high-level":[78,135,199],"selection,":[80],"which":[81,111],"causes":[82],"them":[83],"ignore":[85],"crucial":[86],"spatial":[87],"reasoning":[88],"within":[89],"action":[92,130,136],"movements.":[93],"Second,":[94],"these":[96,123],"models,":[97],"predictors":[101],"neglect":[102],"object":[103],"semantics":[104],"attributes":[107],"related":[108],"passibility,":[110],"can":[112,187],"be":[113],"informative":[114],"indicating":[116],"feasibility":[118,181],"of":[119,182],"To":[121],"address":[122],"two":[124],"issues,":[125],"we":[126,156],"introduce":[127],"decoder":[131],"jointly":[132],"prediction,":[137],"enabling":[138],"current":[140,159],"VLN":[141],"agent":[142,186],"learn":[144],"ground":[146],"selected":[148],"controls.":[154],"Moreover,":[155],"enhance":[157],"by":[162],"utilizing":[163],"representations":[165],"containing":[166],"rich":[167],"semantic":[168],"information":[169],"explicitly":[171],"masking":[172],"obstacles":[173],"based":[174],"on":[175,197],"humans'":[176],"prior":[177],"knowledge":[178],"about":[179],"Empirically,":[184],"our":[185],"improve":[188],"metrics":[191],"strong":[195],"baselines":[196],"both":[198]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-10-10T00:00:00"}
