{"id":"https://openalex.org/W4401417236","doi":"https://doi.org/10.1109/icra57147.2024.10610499","title":"Bridging Zero-shot Object Navigation and Foundation Models through Pixel-Guided Navigation Skill","display_name":"Bridging Zero-shot Object Navigation and Foundation Models through Pixel-Guided Navigation Skill","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401417236","doi":"https://doi.org/10.1109/icra57147.2024.10610499"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610499","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610499","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059258252","display_name":"Wenzhe Cai","orcid":"https://orcid.org/0000-0003-4610-3454"},"institutions":[{"id":"https://openalex.org/I4210090971","display_name":"Southeast University","ror":"https://ror.org/00cf0ab87","country_code":"BD","type":"education","lineage":["https://openalex.org/I4210090971"]}],"countries":["BD"],"is_corresponding":true,"raw_author_name":"Wenzhe Cai","raw_affiliation_strings":["Southeast University,School of Automation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southeast University,School of Automation","institution_ids":["https://openalex.org/I4210090971"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067080265","display_name":"Siyuan Huang","orcid":"https://orcid.org/0000-0003-1524-7148"},"institutions":[{"id":"https://openalex.org/I4210122302","display_name":"ShangHai JiAi Genetics & IVF Institute","ror":"https://ror.org/02rgbry52","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210122302"]},{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyuan Huang","raw_affiliation_strings":["Shanghai AI Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai AI Laboratory","institution_ids":["https://openalex.org/I4210122302","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101632944","display_name":"Guangran Cheng","orcid":"https://orcid.org/0000-0003-0865-532X"},"institutions":[{"id":"https://openalex.org/I4210090971","display_name":"Southeast University","ror":"https://ror.org/00cf0ab87","country_code":"BD","type":"education","lineage":["https://openalex.org/I4210090971"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Guangran Cheng","raw_affiliation_strings":["Southeast University,School of Automation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southeast University,School of Automation","institution_ids":["https://openalex.org/I4210090971"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049786228","display_name":"Yuxing Long","orcid":"https://orcid.org/0009-0006-3246-459X"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yuxing Long","raw_affiliation_strings":["Peking University and National Key Laboratory for Multimedia Information Processing,Hyperlane Lab, CFCS, School of CS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peking University and National Key Laboratory for Multimedia Information Processing,Hyperlane Lab, CFCS, School of CS","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085917066","display_name":"Peng Gao","orcid":"https://orcid.org/0000-0002-5176-628X"},"institutions":[{"id":"https://openalex.org/I4391012619","display_name":"Shanghai Artificial Intelligence Laboratory","ror":"https://ror.org/03wkvpx79","country_code":null,"type":"facility","lineage":["https://openalex.org/I4391012619"]},{"id":"https://openalex.org/I4210122302","display_name":"ShangHai JiAi Genetics & IVF Institute","ror":"https://ror.org/02rgbry52","country_code":"CN","type":"healthcare","lineage":["https://openalex.org/I4210122302"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Peng Gao","raw_affiliation_strings":["Shanghai AI Laboratory"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai AI Laboratory","institution_ids":["https://openalex.org/I4210122302","https://openalex.org/I4391012619"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019248683","display_name":"Changyin Sun","orcid":"https://orcid.org/0000-0001-9269-334X"},"institutions":[{"id":"https://openalex.org/I4210090971","display_name":"Southeast University","ror":"https://ror.org/00cf0ab87","country_code":"BD","type":"education","lineage":["https://openalex.org/I4210090971"]}],"countries":["BD"],"is_corresponding":false,"raw_author_name":"Changyin Sun","raw_affiliation_strings":["Southeast University,School of Automation"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Southeast University,School of Automation","institution_ids":["https://openalex.org/I4210090971"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100389347","display_name":"Hao Dong","orcid":"https://orcid.org/0000-0002-0132-0239"},"institutions":[{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Hao Dong","raw_affiliation_strings":["Peking University and National Key Laboratory for Multimedia Information Processing,Hyperlane Lab, CFCS, School of CS"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Peking University and National Key Laboratory for Multimedia Information Processing,Hyperlane Lab, CFCS, School of CS","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5059258252"],"corresponding_institution_ids":["https://openalex.org/I4210090971"],"apc_list":null,"apc_paid":null,"fwci":8.5053,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.9847372,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5228","last_page":"5234"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9934999942779541,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9904999732971191,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bridging","display_name":"Bridging (networking)","score":0.8187028169631958},{"id":"https://openalex.org/keywords/pixel","display_name":"Pixel","score":0.7078864574432373},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.6359891295433044},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5971513986587524},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5415603518486023},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.510316789150238},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4385419189929962},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.41659754514694214},{"id":"https://openalex.org/keywords/computer-graphics","display_name":"Computer graphics (images)","score":0.3944428563117981},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.07436883449554443}],"concepts":[{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.8187028169631958},{"id":"https://openalex.org/C160633673","wikidata":"https://www.wikidata.org/wiki/Q355198","display_name":"Pixel","level":2,"score":0.7078864574432373},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.6359891295433044},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5971513986587524},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5415603518486023},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.510316789150238},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4385419189929962},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.41659754514694214},{"id":"https://openalex.org/C121684516","wikidata":"https://www.wikidata.org/wiki/Q7600677","display_name":"Computer graphics (images)","level":1,"score":0.3944428563117981},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.07436883449554443},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610499","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra57147.2024.10610499","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W398859631","https://openalex.org/W569478347","https://openalex.org/W3009928773","https://openalex.org/W3023742835","https://openalex.org/W3040041096","https://openalex.org/W3211462570","https://openalex.org/W4283640928","https://openalex.org/W4286973470","https://openalex.org/W4287112297","https://openalex.org/W4292779060","https://openalex.org/W4312647074","https://openalex.org/W4312707839","https://openalex.org/W4322718191","https://openalex.org/W4376312115","https://openalex.org/W4377164392","https://openalex.org/W4383108296","https://openalex.org/W4383108457","https://openalex.org/W4383108519","https://openalex.org/W4383108895","https://openalex.org/W4385416186","https://openalex.org/W4385430679","https://openalex.org/W4386075839","https://openalex.org/W4386083015","https://openalex.org/W4389666115","https://openalex.org/W4390204337","https://openalex.org/W4390874575","https://openalex.org/W4393154152","https://openalex.org/W4402917081","https://openalex.org/W4404612908","https://openalex.org/W6778883912","https://openalex.org/W6780443396","https://openalex.org/W6791353385","https://openalex.org/W6797065961","https://openalex.org/W6800673378","https://openalex.org/W6809509765","https://openalex.org/W6810940169","https://openalex.org/W6811433417","https://openalex.org/W6839632867","https://openalex.org/W6839650396","https://openalex.org/W6849793301","https://openalex.org/W6850503672","https://openalex.org/W6850625674","https://openalex.org/W6850787431","https://openalex.org/W6851950068","https://openalex.org/W6852796095","https://openalex.org/W6853116092","https://openalex.org/W6853520483","https://openalex.org/W6855642159"],"related_works":["https://openalex.org/W2074502265","https://openalex.org/W4214877189","https://openalex.org/W2773965352","https://openalex.org/W4388870064","https://openalex.org/W2381179799","https://openalex.org/W2210139803","https://openalex.org/W4235186151","https://openalex.org/W2980279061","https://openalex.org/W2334685461","https://openalex.org/W2054685365"],"abstract_inverted_index":{"Zero-shot":[0],"object":[1,97,156],"navigation":[2,62,85,112,192],"is":[3,88,123],"a":[4,69,110,124],"challenging":[5],"task":[6,11],"for":[7,30,90],"home-assistance":[8],"robots.":[9,135],"This":[10],"emphasizes":[12],"visual":[13],"grounding,":[14],"commonsense":[15,166],"inference":[16],"and":[17,47,82,100,170,183],"locomotion":[18,32],"abilities,":[19],"where":[20],"the":[21,31,56,76,79,83,104,131,138,141,149,165,174,187],"first":[22],"two":[23],"are":[24,196],"inherent":[25],"in":[26,148],"foundation":[27,59,80,92],"models.":[28],"But":[29],"part,":[33],"most":[34],"works":[35],"still":[36],"depend":[37],"on":[38],"map-based":[39],"planning":[40],"approaches.":[41],"The":[42],"gap":[43,77],"between":[44,78,168],"RGB":[45],"space":[46,49],"map":[48],"makes":[50],"it":[51],"difficult":[52],"to":[53,61,94,163,172],"directly":[54],"transfer":[55],"knowledge":[57,167],"from":[58],"models":[60,81,93],"tasks.":[63],"In":[64],"this":[65],"work,":[66],"we":[67,158],"propose":[68],"Pixel-guided":[70],"Navigation":[71],"skill":[72],"(PixNav),":[73],"which":[74,143],"bridges":[75],"embodied":[84],"task.":[86,152],"It":[87],"straightforward":[89],"recent":[91],"indicate":[95],"an":[96,160],"by":[98],"pixels,":[99],"with":[101],"pixels":[102],"as":[103],"goal":[105],"specification,":[106],"our":[107,121,190,199],"method":[108],"becomes":[109],"versatile":[111],"policy":[113,127],"towards":[114],"all":[115],"different":[116],"kinds":[117],"of":[118,133,140,189],"objects.":[119],"Besides,":[120],"PixNav":[122,142],"pure":[125],"RGB-based":[126],"that":[128],"can":[129],"reduce":[130],"cost":[132],"homeassistance":[134],"Experiments":[136],"demonstrate":[137],"robustness":[139],"achieves":[144],"80+%":[145],"success":[146],"rate":[147],"local":[150],"path-planning":[151],"To":[153],"perform":[154],"long-horizon":[155],"navigation,":[157],"design":[159],"LLM-based":[161],"planner":[162],"utilize":[164],"objects":[169],"rooms":[171],"select":[173],"best":[175],"waypoint.":[176],"Evaluations":[177],"across":[178],"both":[179],"photorealistic":[180],"indoor":[181],"simulators":[182],"real-world":[184],"environments":[185],"validate":[186],"effectiveness":[188],"proposed":[191],"strategy.":[193],"More":[194],"details":[195],"accessible":[197],"via":[198],"project":[200],"website":[201],"https://sites.google.com/view/pixnav/.":[202]},"counts_by_year":[{"year":2026,"cited_by_count":9},{"year":2025,"cited_by_count":25},{"year":2024,"cited_by_count":2}],"updated_date":"2026-05-02T08:42:23.175194","created_date":"2025-10-10T00:00:00"}
