{"id":"https://openalex.org/W2775019037","doi":"https://doi.org/10.1109/iros.2017.8205960","title":"Deep dynamic policy programming for robot control with raw images","display_name":"Deep dynamic policy programming for robot control with raw images","publication_year":2017,"publication_date":"2017-09-01","ids":{"openalex":"https://openalex.org/W2775019037","doi":"https://doi.org/10.1109/iros.2017.8205960","mag":"2775019037"},"language":"en","primary_location":{"id":"doi:10.1109/iros.2017.8205960","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2017.8205960","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056162412","display_name":"Yoshihisa Tsurumine","orcid":"https://orcid.org/0000-0002-1514-1135"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Yoshihisa Tsurumine","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology (NAIST), Nara, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology (NAIST), Nara, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011048472","display_name":"Yunduan Cui","orcid":"https://orcid.org/0000-0001-5539-4260"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Yunduan Cui","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology (NAIST), Nara, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology (NAIST), Nara, Japan","institution_ids":["https://openalex.org/I75917431"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031054137","display_name":"Eiji Uchibe","orcid":"https://orcid.org/0000-0001-7908-0258"},"institutions":[{"id":"https://openalex.org/I4210104143","display_name":"Advanced Telecommunications Research Institute International","ror":"https://ror.org/01pe1d703","country_code":"JP","type":"facility","lineage":["https://openalex.org/I4210104143"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Eiji Uchibe","raw_affiliation_strings":["Advanced Telecommunications Research Institute International (ATR), Kyoto, Japan"],"affiliations":[{"raw_affiliation_string":"Advanced Telecommunications Research Institute International (ATR), Kyoto, Japan","institution_ids":["https://openalex.org/I4210104143"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042074952","display_name":"Takamitsu Matsubara","orcid":"https://orcid.org/0000-0003-3545-4814"},"institutions":[{"id":"https://openalex.org/I75917431","display_name":"Nara Institute of Science and Technology","ror":"https://ror.org/05bhada84","country_code":"JP","type":"education","lineage":["https://openalex.org/I75917431"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Takamitsu Matsubara","raw_affiliation_strings":["Graduate School of Information Science, Nara Institute of Science and Technology (NAIST), Nara, Japan"],"affiliations":[{"raw_affiliation_string":"Graduate School of Information Science, Nara Institute of Science and Technology (NAIST), Nara, Japan","institution_ids":["https://openalex.org/I75917431"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5056162412"],"corresponding_institution_ids":["https://openalex.org/I75917431"],"apc_list":null,"apc_paid":null,"fwci":2.3403,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.91416447,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1545","last_page":"1550"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9916999936103821,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8499075770378113},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.737008273601532},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6930490732192993},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6640198230743408},{"id":"https://openalex.org/keywords/humanoid-robot","display_name":"Humanoid robot","score":0.6397169232368469},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6289474964141846},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5902594327926636},{"id":"https://openalex.org/keywords/dependency","display_name":"Dependency (UML)","score":0.513020932674408},{"id":"https://openalex.org/keywords/robot-control","display_name":"Robot control","score":0.44619259238243103},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4392302930355072},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.38274893164634705},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.17498818039894104},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.17343127727508545}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8499075770378113},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.737008273601532},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6930490732192993},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6640198230743408},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.6397169232368469},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6289474964141846},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5902594327926636},{"id":"https://openalex.org/C19768560","wikidata":"https://www.wikidata.org/wiki/Q320727","display_name":"Dependency (UML)","level":2,"score":0.513020932674408},{"id":"https://openalex.org/C65401140","wikidata":"https://www.wikidata.org/wiki/Q7353385","display_name":"Robot control","level":4,"score":0.44619259238243103},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4392302930355072},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38274893164634705},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.17498818039894104},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.17343127727508545},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros.2017.8205960","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros.2017.8205960","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5199999809265137}],"awards":[],"funders":[{"id":"https://openalex.org/F4320328988","display_name":"Instituto de Telecomunica\u00e7\u00f5es","ror":"https://ror.org/02ht4fk33"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1976207411","https://openalex.org/W1977655452","https://openalex.org/W2097117768","https://openalex.org/W2121863487","https://openalex.org/W2124175081","https://openalex.org/W2130179125","https://openalex.org/W2143612262","https://openalex.org/W2145060720","https://openalex.org/W2145339207","https://openalex.org/W2147768505","https://openalex.org/W2155968351","https://openalex.org/W2163605009","https://openalex.org/W2173248099","https://openalex.org/W2173564293","https://openalex.org/W2271840356","https://openalex.org/W2554120691","https://openalex.org/W2554984891","https://openalex.org/W2569146624","https://openalex.org/W2571136349","https://openalex.org/W2586680856","https://openalex.org/W2623331213","https://openalex.org/W2746553466","https://openalex.org/W2950492145","https://openalex.org/W2951799221","https://openalex.org/W2962901215","https://openalex.org/W2963095800","https://openalex.org/W2963864421","https://openalex.org/W2964043796","https://openalex.org/W4214717370","https://openalex.org/W4285719527","https://openalex.org/W6678114464","https://openalex.org/W6679486095","https://openalex.org/W6681439324","https://openalex.org/W6682330108","https://openalex.org/W6684191040","https://openalex.org/W6685444567","https://openalex.org/W6692846177","https://openalex.org/W6729507393","https://openalex.org/W6730111887"],"related_works":["https://openalex.org/W2543019745","https://openalex.org/W2531662632","https://openalex.org/W4247750500","https://openalex.org/W2540452882","https://openalex.org/W2356070666","https://openalex.org/W2778262232","https://openalex.org/W2151682110","https://openalex.org/W2910269320","https://openalex.org/W2152492056","https://openalex.org/W2296151496"],"abstract_inverted_index":{"Deep":[0,64,110,141],"reinforcement":[1,86],"learning":[2,47,87,138],"has":[3],"drawn":[4],"much":[5],"attention":[6],"in":[7,46,98],"robot":[8,56,103,117,132],"control":[9,16,105],"since":[10],"it":[11,49],"enables":[12],"agents":[13],"to":[14,51,109,144],"learn":[15,145],"policies":[17],"from":[18],"very":[19],"high":[20],"dimensional":[21],"states":[22],"such":[23],"as":[24],"raw":[25],"images.":[26],"On":[27],"the":[28,34,71,83,92,102,123,146],"other":[29],"hand,":[30],"its":[31,44],"dependency":[32],"upon":[33],"availability":[35],"of":[36,40,78,91,101,125,137],"a":[37,99,115,126,129,134],"significant":[38],"quantity":[39],"training":[41],"samples":[42],"and":[43,74],"fragility":[45],"makes":[48],"difficult":[50],"apply":[52],"for":[53],"real":[54,116],"world":[55],"tasks.":[57],"To":[58],"alleviate":[59],"these":[60],"issues":[61],"we":[62],"propose":[63],"Dynamic":[65],"Policy":[66],"Programming":[67],"(DDPP),":[68],"which":[69],"combines":[70],"sample":[72],"efficiency":[73],"smooth":[75],"policy":[76,80],"updates":[77],"dynamic":[79],"programming":[81],"with":[82,107,128],"contemporary":[84],"deep":[85],"framework.":[88],"The":[89],"effectiveness":[90],"proposed":[93],"method":[94],"is":[95],"first":[96],"demonstrated":[97],"simulation":[100],"arm":[104],"problem,":[106],"comparison":[108],"Q-Networks.":[111],"As":[112],"validation":[113],"on":[114],"system,":[118],"DDPP":[119],"also":[120],"successfully":[121],"learned":[122],"flipping":[124],"handkerchief":[127],"NEXTAGE":[130],"humanoid":[131],"using":[133],"reduced":[135],"number":[136],"samples,":[139],"whereas":[140],"Q-Networks":[142],"failed":[143],"task.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":6},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
