{"id":"https://openalex.org/W2759463506","doi":"https://doi.org/10.1109/icra.2018.8460655","title":"Self-Supervised Deep Reinforcement Learning with Generalized Computation Graphs for Robot Navigation","display_name":"Self-Supervised Deep Reinforcement Learning with Generalized Computation Graphs for Robot Navigation","publication_year":2018,"publication_date":"2018-05-01","ids":{"openalex":"https://openalex.org/W2759463506","doi":"https://doi.org/10.1109/icra.2018.8460655","mag":"2759463506"},"language":"en","primary_location":{"id":"doi:10.1109/icra.2018.8460655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2018.8460655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1709.10489","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064156720","display_name":"Gregory Kahn","orcid":"https://orcid.org/0000-0003-1771-6147"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gregory Kahn","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","University of California, Berkeley AI Research (BAIR), Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California, Berkeley AI Research (BAIR), Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027887123","display_name":"Adam Villaflor","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Adam Villaflor","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","University of California, Berkeley AI Research (BAIR), Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California, Berkeley AI Research (BAIR), Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048526274","display_name":"Bosen Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bosen Ding","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","University of California, Berkeley AI Research (BAIR), Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California, Berkeley AI Research (BAIR), Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049349154","display_name":"Pieter Abbeel","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Pieter Abbeel","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","University of California, Berkeley AI Research (BAIR), Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California, Berkeley AI Research (BAIR), Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026322200","display_name":"Sergey Levine","orcid":"https://orcid.org/0000-0001-6764-2743"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sergey Levine","raw_affiliation_strings":["Berkeley AI Research (BAIR), University of California, Berkeley","University of California, Berkeley AI Research (BAIR), Berkeley"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Berkeley AI Research (BAIR), University of California, Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"University of California, Berkeley AI Research (BAIR), Berkeley","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.1964,"has_fulltext":true,"cited_by_count":24,"citation_normalized_percentile":{"value":0.90300671,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"5129","last_page":"5136"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7538073062896729},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7006452083587646},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6945624947547913},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6390656232833862},{"id":"https://openalex.org/keywords/computation","display_name":"Computation","score":0.6378530859947205},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.5976349115371704},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5327313542366028},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5035397410392761},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.44617751240730286},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4408940076828003},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.43921029567718506},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.2448592483997345},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.12156164646148682},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.07729315757751465}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7538073062896729},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7006452083587646},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6945624947547913},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6390656232833862},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.6378530859947205},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.5976349115371704},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5327313542366028},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5035397410392761},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.44617751240730286},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4408940076828003},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.43921029567718506},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2448592483997345},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.12156164646148682},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.07729315757751465},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icra.2018.8460655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icra.2018.8460655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1709.10489","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1709.10489","pdf_url":"https://arxiv.org/pdf/1709.10489","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2759463506","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1709.10489","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1709.10489","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1709.10489","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1709.10489","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1709.10489","pdf_url":"https://arxiv.org/pdf/1709.10489","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309480","display_name":"Nvidia","ror":"https://ror.org/03jdj4y14"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2759463506.pdf","grobid_xml":"https://content.openalex.org/works/W2759463506.grobid-xml"},"referenced_works_count":43,"referenced_works":["https://openalex.org/W51508254","https://openalex.org/W1502364872","https://openalex.org/W1506241136","https://openalex.org/W1979266466","https://openalex.org/W1999050977","https://openalex.org/W2024139303","https://openalex.org/W2069239883","https://openalex.org/W2118429180","https://openalex.org/W2118688707","https://openalex.org/W2121806728","https://openalex.org/W2121863487","https://openalex.org/W2132400125","https://openalex.org/W2156256170","https://openalex.org/W2167224731","https://openalex.org/W2336416123","https://openalex.org/W2342840547","https://openalex.org/W2406067508","https://openalex.org/W2469894155","https://openalex.org/W2555488107","https://openalex.org/W2586067474","https://openalex.org/W2951660448","https://openalex.org/W2953248129","https://openalex.org/W2962957005","https://openalex.org/W2963424321","https://openalex.org/W4232280717","https://openalex.org/W6602057636","https://openalex.org/W6630394583","https://openalex.org/W6637967152","https://openalex.org/W6645240944","https://openalex.org/W6650313687","https://openalex.org/W6676861555","https://openalex.org/W6677477928","https://openalex.org/W6678097026","https://openalex.org/W6683300800","https://openalex.org/W6684338915","https://openalex.org/W6684921986","https://openalex.org/W6704559304","https://openalex.org/W6719700797","https://openalex.org/W6728397222","https://openalex.org/W6730027589","https://openalex.org/W6733118196","https://openalex.org/W6740836278","https://openalex.org/W6741515245"],"related_works":["https://openalex.org/W2963544079","https://openalex.org/W3104933725","https://openalex.org/W3095339682","https://openalex.org/W3046387304","https://openalex.org/W2770679144","https://openalex.org/W94382907","https://openalex.org/W2969017387","https://openalex.org/W3102366189","https://openalex.org/W3085134088","https://openalex.org/W2038822802","https://openalex.org/W569084886","https://openalex.org/W2534269850","https://openalex.org/W3044158561","https://openalex.org/W2967452881","https://openalex.org/W13141533","https://openalex.org/W1494702199","https://openalex.org/W2607101700","https://openalex.org/W2968968404","https://openalex.org/W2952972066","https://openalex.org/W3131623996"],"abstract_inverted_index":{"Enabling":[0],"robots":[1],"to":[2,36,75,81,90,124,178],"autonomously":[3],"navigate":[4,37,179],"complex":[5,92,182],"environments":[6],"is":[7,135],"essential":[8],"for":[9],"real-world":[10,79,170],"deployment.":[11],"Prior":[12],"methods":[13,63,107],"approach":[14,153,167],"this":[15,122],"problem":[16],"by":[17],"having":[18],"the":[19,26,39,66,70,78,88,143,196],"robot":[20,67],"maintain":[21],"an":[22],"internal":[23,40],"map":[24],"of":[25,49,146,189,195],"world,":[27],"and":[28,33,54,108,117,134,150,156,173,198],"then":[29,120],"use":[30],"a":[31,47,99,126,169,181,186],"localization":[32],"planning":[34],"method":[35],"through":[38,180],"map.":[41],"However,":[42],"these":[43],"approaches":[44],"often":[45],"include":[46],"variety":[48],"assumptions,":[50],"are":[51,73],"computationally":[52],"intensive,":[53],"do":[55],"not":[56],"learn":[57,91,177],"from":[58,131],"failures.":[59],"In":[60],"contrast,":[61],"learning-based":[62],"improve":[64],"as":[65],"acts":[68],"in":[69,77],"environment,":[71],"but":[72],"difficult":[74],"deploy":[76],"due":[80],"their":[82],"high":[83],"sample":[84,136],"complexity.":[85],"To":[86],"address":[87],"need":[89],"policies":[93],"with":[94,111,185],"few":[95,187],"samples,":[96],"we":[97],"propose":[98],"generalized":[100],"computation":[101],"graph":[102,123],"that":[103,129],"subsumes":[104],"value-based":[105],"model-free":[106,116],"model-based":[109],"methods,":[110],"specific":[112],"instantiations":[113],"interpolating":[114],"between":[115],"model-based.":[118],"We":[119,163],"instantiate":[121],"form":[125],"navigation":[127,148],"model":[128],"learns":[130],"raw":[132],"images":[133],"efficient.":[137],"Our":[138],"simulated":[139],"car":[140,172],"experiments":[141,197],"explore":[142],"design":[144],"decisions":[145],"our":[147,152,166],"model,":[149],"show":[151,174],"outperforms":[154],"single-step":[155],"<i":[157],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[158],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">N</i>":[159],"-step":[160],"double":[161],"Q-learning.":[162],"also":[164],"evaluate":[165],"on":[168],"RC":[171],"it":[175],"can":[176,200],"indoor":[183],"environment":[184],"hours":[188],"fully":[190],"autonomous,":[191],"self-supervised":[192],"training.":[193],"Videos":[194],"code":[199],"be":[201],"found":[202],"at":[203],"github.com/gkahn13/gcg.":[204]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
