{"id":"https://openalex.org/W3091379283","doi":"https://doi.org/10.1109/icra40945.2020.9197199","title":"Learning Navigation Costs from Demonstration in Partially Observable Environments","display_name":"Learning Navigation Costs from Demonstration in Partially Observable Environments","publication_year":2020,"publication_date":"2020-05-01","ids":{"openalex":"https://openalex.org/W3091379283","doi":"https://doi.org/10.1109/icra40945.2020.9197199","mag":"3091379283"},"language":"en","primary_location":{"id":"doi:10.1109/icra40945.2020.9197199","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9197199","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100417408","display_name":"Tianyu Wang","orcid":"https://orcid.org/0000-0001-6206-8359"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Tianyu Wang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033777391","display_name":"Vikas Dhiman","orcid":"https://orcid.org/0000-0003-0078-3677"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vikas Dhiman","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066400889","display_name":"Nikolay Atanasov","orcid":"https://orcid.org/0000-0003-0272-7580"},"institutions":[{"id":"https://openalex.org/I36258959","display_name":"University of California, San Diego","ror":"https://ror.org/0168r3w48","country_code":"US","type":"education","lineage":["https://openalex.org/I36258959"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nikolay Atanasov","raw_affiliation_strings":["Department of Electrical and Computer Engineering, University of California, San Diego, La Jolla, CA, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, University of California, San Diego, La Jolla, CA, USA","institution_ids":["https://openalex.org/I36258959"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100417408"],"corresponding_institution_ids":["https://openalex.org/I36258959"],"apc_list":null,"apc_paid":null,"fwci":0.5302,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.73178966,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"12","issue":null,"first_page":"4434","last_page":"4440"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6877686381340027},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5883346796035767},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5708514451980591},{"id":"https://openalex.org/keywords/observability","display_name":"Observability","score":0.5430483222007751},{"id":"https://openalex.org/keywords/observable","display_name":"Observable","score":0.5411045551300049},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.5333725214004517},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5168653130531311},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5089240074157715},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4727514684200287},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.45854735374450684},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4541359543800354},{"id":"https://openalex.org/keywords/compass","display_name":"Compass","score":0.4446392059326172},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.4291326403617859},{"id":"https://openalex.org/keywords/dynamic-programming","display_name":"Dynamic programming","score":0.42275387048721313},{"id":"https://openalex.org/keywords/state-space","display_name":"State space","score":0.41465649008750916},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.38838693499565125},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.22298139333724976},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14234161376953125}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6877686381340027},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5883346796035767},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5708514451980591},{"id":"https://openalex.org/C36299963","wikidata":"https://www.wikidata.org/wiki/Q1369844","display_name":"Observability","level":2,"score":0.5430483222007751},{"id":"https://openalex.org/C32848918","wikidata":"https://www.wikidata.org/wiki/Q845789","display_name":"Observable","level":2,"score":0.5411045551300049},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.5333725214004517},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5168653130531311},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5089240074157715},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4727514684200287},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.45854735374450684},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4541359543800354},{"id":"https://openalex.org/C2778361833","wikidata":"https://www.wikidata.org/wiki/Q34735","display_name":"Compass","level":2,"score":0.4446392059326172},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.4291326403617859},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.42275387048721313},{"id":"https://openalex.org/C72434380","wikidata":"https://www.wikidata.org/wiki/Q230930","display_name":"State space","level":2,"score":0.41465649008750916},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.38838693499565125},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.22298139333724976},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14234161376953125},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C1276947","wikidata":"https://www.wikidata.org/wiki/Q333","display_name":"Astronomy","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra40945.2020.9197199","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra40945.2020.9197199","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W131069610","https://openalex.org/W1522301498","https://openalex.org/W1591675293","https://openalex.org/W1594849649","https://openalex.org/W1971086298","https://openalex.org/W1986014385","https://openalex.org/W2011418219","https://openalex.org/W2015263936","https://openalex.org/W2061562262","https://openalex.org/W2098774185","https://openalex.org/W2099430963","https://openalex.org/W2116442740","https://openalex.org/W2133844819","https://openalex.org/W2161076907","https://openalex.org/W2169498096","https://openalex.org/W2593841437","https://openalex.org/W2732671178","https://openalex.org/W2899771611","https://openalex.org/W2962879844","https://openalex.org/W2962893898","https://openalex.org/W2962951365","https://openalex.org/W2963881378","https://openalex.org/W2963980401","https://openalex.org/W2964077562","https://openalex.org/W2964121744","https://openalex.org/W2964332541","https://openalex.org/W3021208093","https://openalex.org/W3027397596","https://openalex.org/W4211092921","https://openalex.org/W4297573592","https://openalex.org/W4297943099","https://openalex.org/W4388475847","https://openalex.org/W6605295560","https://openalex.org/W6631190155","https://openalex.org/W6634004297","https://openalex.org/W6635261211","https://openalex.org/W6674884181","https://openalex.org/W6683620479","https://openalex.org/W6692405165","https://openalex.org/W6735030908","https://openalex.org/W6740792189","https://openalex.org/W6743790455","https://openalex.org/W6756040250"],"related_works":["https://openalex.org/W2046459260","https://openalex.org/W2967463586","https://openalex.org/W2765830098","https://openalex.org/W1971989957","https://openalex.org/W3157641275","https://openalex.org/W2517338020","https://openalex.org/W4312300846","https://openalex.org/W2053863919","https://openalex.org/W2067790096","https://openalex.org/W3086381881"],"abstract_inverted_index":{"This":[0],"paper":[1],"focuses":[2],"on":[3,36,63,133],"inverse":[4],"reinforcement":[5],"learning":[6],"(IRL)":[7],"to":[8,23],"enable":[9],"safe":[10],"and":[11,39,67,88,182],"efficient":[12,150],"autonomous":[13],"navigation":[14,31,173],"in":[15,120,171],"unknown":[16],"partially":[17,122],"observable":[18,123],"environments.":[19],"The":[20,76],"objective":[21],"is":[22,99,118],"infer":[24],"a":[25,48,56,68,89,134,143],"cost":[26,49,69,95],"function":[27,50,108],"that":[28,116,161],"explains":[29],"expert-demonstrated":[30],"behavior":[32],"while":[33,175],"relying":[34],"only":[35,141],"the":[37,44,64,73,83,94,106,110,138,165,178],"observations":[38],"state-control":[40],"trajectory":[41],"used":[42],"by":[43,81,102],"expert.":[45],"We":[46,114],"develop":[47],"representation":[51,77],"composed":[52],"of":[53,137,145,167,180],"two":[54],"parts:":[55],"probabilistic":[57],"occupancy":[58,74],"encoder,":[59,70],"with":[60],"recurrent":[61],"dependence":[62],"observation":[65],"sequence,":[66],"defined":[71],"over":[72,109,142],"features.":[75],"parameters":[78],"are":[79,128],"optimized":[80],"differentiating":[82],"error":[84],"between":[85],"demonstrated":[86],"controls":[87],"control":[90],"policy":[91],"computed":[92,101],"from":[93],"encoder.":[96],"Such":[97],"differentiation":[98],"typically":[100],"dynamic":[103],"programming":[104],"through":[105],"value":[107],"whole":[111],"state":[112],"space.":[113],"observe":[115],"this":[117],"inefficient":[119],"large":[121],"environments":[124],"because":[125],"most":[126],"states":[127,147],"unexplored.":[129],"Instead,":[130],"we":[131],"rely":[132],"closed-form":[135],"subgradient":[136],"cost-to-go":[139],"obtained":[140],"subset":[144],"promising":[146],"via":[148],"an":[149],"motion-planning":[151],"algorithm":[152],"such":[153],"as":[154],"A*":[155],"or":[156],"RRT.":[157],"Our":[158],"experiments":[159],"show":[160],"our":[162],"model":[163],"exceeds":[164],"accuracy":[166],"baseline":[168],"IRL":[169],"algorithms":[170],"robot":[172],"tasks,":[174],"substantially":[176],"improving":[177],"efficiency":[179],"training":[181],"test-time":[183],"inference.":[184]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
