{"id":"https://openalex.org/W3166807227","doi":"https://doi.org/10.1109/icme51207.2021.9428422","title":"Depth-Guided AdaIN and Shift Attention Network for Vision-And-Language Navigation","display_name":"Depth-Guided AdaIN and Shift Attention Network for Vision-And-Language Navigation","publication_year":2021,"publication_date":"2021-06-09","ids":{"openalex":"https://openalex.org/W3166807227","doi":"https://doi.org/10.1109/icme51207.2021.9428422","mag":"3166807227"},"language":"en","primary_location":{"id":"doi:10.1109/icme51207.2021.9428422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme51207.2021.9428422","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101792110","display_name":"Qiang Sun","orcid":"https://orcid.org/0000-0002-2877-8960"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Qiang Sun","raw_affiliation_strings":["Academy for Engineering &#x0026; Technology, Fudan University,China"],"affiliations":[{"raw_affiliation_string":"Academy for Engineering &#x0026; Technology, Fudan University,China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027114660","display_name":"Yifeng Zhuang","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yifeng Zhuang","raw_affiliation_strings":["School of Computer Science, Fudan University,China","School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113976720","display_name":"Zhengqing Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhengqing Chen","raw_affiliation_strings":["School of Computer Science, Fudan University,China","School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084959430","display_name":"Yanwei Fu","orcid":"https://orcid.org/0000-0002-6595-6893"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yanwei Fu","raw_affiliation_strings":["School of Data Science, Fudan University, MOE Frontiers Center for Brain Science, and Shanghai Key Lab of Intelligent Information Processing,China","School of Data Science, Fudan University, MOE Frontiers Center for Brain Science, and Shanghai Key Lab of Intelligent Information Processing, China"],"affiliations":[{"raw_affiliation_string":"School of Data Science, Fudan University, MOE Frontiers Center for Brain Science, and Shanghai Key Lab of Intelligent Information Processing,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Data Science, Fudan University, MOE Frontiers Center for Brain Science, and Shanghai Key Lab of Intelligent Information Processing, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003418019","display_name":"Xiangyang Xue","orcid":"https://orcid.org/0000-0002-4897-9209"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiangyang Xue","raw_affiliation_strings":["School of Computer Science, Fudan University,China","School of Computer Science, Fudan University, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Fudan University,China","institution_ids":["https://openalex.org/I24943067"]},{"raw_affiliation_string":"School of Computer Science, Fudan University, China","institution_ids":["https://openalex.org/I24943067"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5101792110"],"corresponding_institution_ids":["https://openalex.org/I24943067"],"apc_list":null,"apc_paid":null,"fwci":0.5764,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.67614379,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8309069871902466},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6518169045448303},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6089508533477783},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.5908022522926331},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.48305875062942505},{"id":"https://openalex.org/keywords/rgb-color-model","display_name":"RGB color model","score":0.41461044549942017},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.4138849675655365},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.41077637672424316},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.3330834209918976}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8309069871902466},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6518169045448303},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6089508533477783},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.5908022522926331},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.48305875062942505},{"id":"https://openalex.org/C82990744","wikidata":"https://www.wikidata.org/wiki/Q166194","display_name":"RGB color model","level":2,"score":0.41461044549942017},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.4138849675655365},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.41077637672424316},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.3330834209918976},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C19165224","wikidata":"https://www.wikidata.org/wiki/Q23404","display_name":"Anthropology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme51207.2021.9428422","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme51207.2021.9428422","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https://openalex.org/W2502312327","https://openalex.org/W2603777577","https://openalex.org/W2805984364","https://openalex.org/W2884565639","https://openalex.org/W2909303996","https://openalex.org/W2926977875","https://openalex.org/W2954284167","https://openalex.org/W2962744691","https://openalex.org/W2963800628","https://openalex.org/W2963846044","https://openalex.org/W2964339842","https://openalex.org/W2964935470","https://openalex.org/W2966715458","https://openalex.org/W2974759213","https://openalex.org/W2981799368","https://openalex.org/W2990152177","https://openalex.org/W3034500398","https://openalex.org/W3035232877","https://openalex.org/W4303633609","https://openalex.org/W4385245566","https://openalex.org/W6684821475","https://openalex.org/W6724804524","https://openalex.org/W6739901393","https://openalex.org/W6751885507","https://openalex.org/W6753516098","https://openalex.org/W6757724268","https://openalex.org/W6765279218","https://openalex.org/W6766904570"],"related_works":["https://openalex.org/W1564680838","https://openalex.org/W2060591604","https://openalex.org/W2003125260","https://openalex.org/W2166791242","https://openalex.org/W3012440055","https://openalex.org/W1934413089","https://openalex.org/W2585162246","https://openalex.org/W1992291644","https://openalex.org/W2098419343","https://openalex.org/W2051178964"],"abstract_inverted_index":{"Visual":[0],"Language":[1],"Navigation":[2],"(VLN)":[3],"is":[4],"the":[5,12,17,25,36,39,44,47,58,94,111,115,119,124,148,154,165,170,177,183],"grand":[6],"goal":[7],"of":[8,153,179],"AI,":[9],"which":[10],"enables":[11],"agent":[13,26],"to":[14,28,117,123,146,163],"act":[15],"by":[16,35],"language":[18],"instructions":[19,37],"from":[20],"humans.":[21],"In":[22],"VLN":[23],"task,":[24,88],"learns":[27],"search":[29],"for":[30,93],"a":[31,53,77,83,89,139,159],"specific":[32],"region":[33],"described":[34],"in":[38,46,75,107,151,169],"training":[40],"environments,":[41],"and":[42,60,71,85,103,157],"performs":[43],"navigation":[45,79,95],"unseen":[48,61],"environments.":[49,62],"Normally,":[50],"there":[51],"exists":[52],"large":[54],"domain":[55],"gap":[56],"be-tween":[57],"seen":[59],"Numerous":[63],"works":[64],"have":[65,175],"been":[66,105],"put":[67],"on":[68,138,182],"data":[69],"augmentation":[70],"designing":[72],"new":[73],"loss":[74],"such":[76],"multi-task":[78],"setting.":[80],"However,":[81],"as":[82],"spatial":[84,121],"temporal":[86],"searching":[87],"valuable":[90],"signal":[91],"source":[92],"\u2013":[96],"depth":[97,155],"has":[98],"not":[99],"yet":[100],"fully":[101],"explored":[102],"thus":[104],"ignored":[106],"previous":[108],"efforts.":[109],"Typically,":[110],"current":[112],"models":[113],"lack":[114],"ability":[116],"capture":[118],"relative":[120,166],"directions":[122],"grounding":[125],"view.":[126],"To":[127],"address":[128],"these":[129],"issues,":[130],"we":[131],"propose":[132],"an":[133],"environment":[134],"adaptive":[135],"method":[136,181],"based":[137],"Depth-guided":[140],"Adaptive":[141],"Instance":[142],"Normalization":[143],"(DG-AdaIN)":[144],"module":[145,162],"adjust":[147],"RGB":[149],"features":[150],"term":[152],"features,":[156],"develop":[158],"shift":[160],"attention":[161,171],"model":[164],"direct":[167],"information":[168],"map.":[172],"Extensive":[173],"experiments":[174],"validated":[176],"efficacy":[178],"our":[180],"benchmark":[184],"dataset.":[185]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
