{"id":"https://openalex.org/W4390484004","doi":"https://doi.org/10.1145/3595916.3626450","title":"Vision-Language Navigation for Quadcopters with Conditional Transformer and Prompt-based Text Rephraser","display_name":"Vision-Language Navigation for Quadcopters with Conditional Transformer and Prompt-based Text Rephraser","publication_year":2023,"publication_date":"2023-12-06","ids":{"openalex":"https://openalex.org/W4390484004","doi":"https://doi.org/10.1145/3595916.3626450"},"language":"en","primary_location":{"id":"doi:10.1145/3595916.3626450","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626450","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626450","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626450","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103144304","display_name":"Zhe Chen","orcid":"https://orcid.org/0000-0001-9108-9028"},"institutions":[{"id":"https://openalex.org/I66906201","display_name":"University of Yamanashi","ror":"https://ror.org/059x21724","country_code":"JP","type":"education","lineage":["https://openalex.org/I66906201"]},{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN","JP"],"is_corresponding":true,"raw_author_name":"Zhe Chen","raw_affiliation_strings":["Hangzhou Dianzi University, China and University of Yamanashi, Japan"],"affiliations":[{"raw_affiliation_string":"Hangzhou Dianzi University, China and University of Yamanashi, Japan","institution_ids":["https://openalex.org/I50760025","https://openalex.org/I66906201"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067208985","display_name":"Jiyi Li","orcid":"https://orcid.org/0000-0003-4997-3850"},"institutions":[{"id":"https://openalex.org/I66906201","display_name":"University of Yamanashi","ror":"https://ror.org/059x21724","country_code":"JP","type":"education","lineage":["https://openalex.org/I66906201"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Jiyi Li","raw_affiliation_strings":["University of Yamanashi, Japan"],"affiliations":[{"raw_affiliation_string":"University of Yamanashi, Japan","institution_ids":["https://openalex.org/I66906201"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5079605672","display_name":"Fumiyo Fukumoto","orcid":"https://orcid.org/0000-0001-7858-6206"},"institutions":[{"id":"https://openalex.org/I66906201","display_name":"University of Yamanashi","ror":"https://ror.org/059x21724","country_code":"JP","type":"education","lineage":["https://openalex.org/I66906201"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Fumiyo Fukumoto","raw_affiliation_strings":["University of Yamanashi, Japan"],"affiliations":[{"raw_affiliation_string":"University of Yamanashi, Japan","institution_ids":["https://openalex.org/I66906201"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056163974","display_name":"Peng Liu","orcid":"https://orcid.org/0000-0002-3403-2604"},"institutions":[{"id":"https://openalex.org/I50760025","display_name":"Hangzhou Dianzi University","ror":"https://ror.org/0576gt767","country_code":"CN","type":"education","lineage":["https://openalex.org/I50760025"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Liu","raw_affiliation_strings":["Hangzhou Dianzi University, China"],"affiliations":[{"raw_affiliation_string":"Hangzhou Dianzi University, China","institution_ids":["https://openalex.org/I50760025"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003025940","display_name":"Yoshimi Suzuki","orcid":"https://orcid.org/0000-0001-5466-7351"},"institutions":[{"id":"https://openalex.org/I66906201","display_name":"University of Yamanashi","ror":"https://ror.org/059x21724","country_code":"JP","type":"education","lineage":["https://openalex.org/I66906201"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yoshimi Suzuki","raw_affiliation_strings":["University of Yamanashi, Japan"],"affiliations":[{"raw_affiliation_string":"University of Yamanashi, Japan","institution_ids":["https://openalex.org/I66906201"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5103144304"],"corresponding_institution_ids":["https://openalex.org/I50760025","https://openalex.org/I66906201"],"apc_list":null,"apc_paid":null,"fwci":0.2427,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.55498347,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6700303554534912},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.47768956422805786},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.460342675447464},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4307926893234253},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1525595486164093},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.11292764544487}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6700303554534912},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.47768956422805786},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.460342675447464},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4307926893234253},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1525595486164093},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.11292764544487},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3595916.3626450","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626450","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626450","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3595916.3626450","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626450","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626450","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8100000023841858}],"awards":[{"id":"https://openalex.org/G1069223013","display_name":null,"funder_award_id":"JSPS KAKENHI","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G3459562248","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G4636223006","display_name":null,"funder_award_id":"JSPS KAK","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"},{"id":"https://openalex.org/G8430481527","display_name":null,"funder_award_id":"Number","funder_id":"https://openalex.org/F4320334764","funder_display_name":"Japan Society for the Promotion of Science"}],"funders":[{"id":"https://openalex.org/F4320334764","display_name":"Japan Society for the Promotion of Science","ror":"https://ror.org/00hhkn466"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390484004.pdf","grobid_xml":"https://content.openalex.org/works/W4390484004.grobid-xml"},"referenced_works_count":11,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2064675550","https://openalex.org/W2194775991","https://openalex.org/W2615547864","https://openalex.org/W2963800628","https://openalex.org/W2979727876","https://openalex.org/W3005971801","https://openalex.org/W3172675210","https://openalex.org/W3205676116","https://openalex.org/W4312933868","https://openalex.org/W4384268367"],"related_works":["https://openalex.org/W2058170566","https://openalex.org/W2755342338","https://openalex.org/W2772917594","https://openalex.org/W2166024367","https://openalex.org/W3116076068","https://openalex.org/W2229312674","https://openalex.org/W2951359407","https://openalex.org/W2079911747","https://openalex.org/W1969923398","https://openalex.org/W2775347418"],"abstract_inverted_index":{"Controlling":[0],"drones":[1,21],"with":[2,22,126],"natural":[3,48],"language":[4,81],"instructions":[5,36,72,99,155],"is":[6],"an":[7,113],"important":[8],"topic":[9],"in":[10],"Vision-and-Language":[11],"Navigation":[12],"(VLN).":[13],"However,":[14],"previous":[15],"models":[16],"can":[17],"not":[18],"effectively":[19,64],"guide":[20],"the":[23,33,38,43,66,70,78,91,102,133,151,157,163,165,175],"integration":[24],"of":[25,30,80],"multimodal":[26,67],"features,":[27,68],"as":[28],"few":[29],"them":[31],"exploit":[32],"correlations":[34],"between":[35,178],"and":[37,41,73,153,180],"environmental":[39],"contexts":[40],"consider":[42],"model\u2019s":[44],"capacity":[45],"to":[46,63,89],"understand":[47],"languages.":[49],"Therefore,":[50],"we":[51,83,106,143],"propose":[52,107],"a":[53,60,108,127,145],"novel":[54],"language-enhanced":[55],"cross-modal":[56],"model":[57,131],"that":[58,93],"has":[59],"conditional":[61],"Transformer":[62],"integrate":[65],"i.e.,":[69],"textual":[71,98],"visual":[74],"contexts.":[75],"To":[76,140],"enhance":[77],"ability":[79],"representation,":[82],"also":[84],"employ":[85],"SentenceBERT.":[86],"In":[87],"addition,":[88],"address":[90],"issue":[92],"users":[94],"could":[95],"provide":[96],"various":[97],"even":[100],"for":[101,118],"same":[103],"navigation":[104],"task,":[105],"prompt-based":[109],"approach":[110],"by":[111,138,149,170,184],"introducing":[112],"LLM-based":[114],"intermediary":[115],"component":[116],"(LLMIR)":[117],"rephrasing":[119],"users\u2019":[120],"instructions.":[121],"We":[122],"evaluate":[123,141],"our":[124],"approaches":[125],"quadcopter":[128],"simulator.":[129],"Our":[130],"improves":[132,169],"absolute":[134],"task":[135,166],"completion":[136,167],"rate":[137,168],"1.39%.":[139],"LLMIR,":[142],"create":[144],"new":[146,179],"test":[147,159,182],"set":[148,183],"extracting":[150],"essential":[152],"minimal":[154],"from":[156],"original":[158,181],"set.":[160],"By":[161],"using":[162],"LLM,":[164],"1.51%.":[171],"And":[172],"it":[173],"narrows":[174],"performance":[176],"gap":[177],"34.83%.":[185]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2026-03-22T08:09:32.410652","created_date":"2025-10-10T00:00:00"}
