{"id":"https://openalex.org/W2908391464","doi":"https://doi.org/10.1109/icarcv.2018.8581310","title":"Learning to Run with Potential-Based Reward Shaping and Demonstrations from Video Data","display_name":"Learning to Run with Potential-Based Reward Shaping and Demonstrations from Video Data","publication_year":2018,"publication_date":"2018-11-01","ids":{"openalex":"https://openalex.org/W2908391464","doi":"https://doi.org/10.1109/icarcv.2018.8581310","mag":"2908391464"},"language":"en","primary_location":{"id":"doi:10.1109/icarcv.2018.8581310","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icarcv.2018.8581310","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 15th International Conference on Control, Automation, Robotics and Vision (ICARCV)","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2012.08824","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073631643","display_name":"Aleksandra Malysheva","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Aleksandra Malysheva","raw_affiliation_strings":["JetBrains Research, St Petersburg, Russia"],"affiliations":[{"raw_affiliation_string":"JetBrains Research, St Petersburg, Russia","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009587907","display_name":"Daniel Kudenko\u22c6","orcid":"https://orcid.org/0000-0003-3359-3255"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Daniel Kudenko","raw_affiliation_strings":["JetBrains Research, St Petersburg, Russia"],"affiliations":[{"raw_affiliation_string":"JetBrains Research, St Petersburg, Russia","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5048095728","display_name":"Aleksei Shpilman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Aleksei Shpilman","raw_affiliation_strings":["JetBrains Research, St Petersburg, Russia"],"affiliations":[{"raw_affiliation_string":"JetBrains Research, St Petersburg, Russia","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5073631643"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.111,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.48283394,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"98","issue":null,"first_page":"286","last_page":"291"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9918000102043152,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9704999923706055,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8393415212631226},{"id":"https://openalex.org/keywords/humanoid-robot","display_name":"Humanoid robot","score":0.7964882850646973},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7643693089485168},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5825532078742981},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.5536622405052185},{"id":"https://openalex.org/keywords/competition","display_name":"Competition (biology)","score":0.5029653906822205},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.49808454513549805},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.43415015935897827},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.3874087631702423},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.37422752380371094},{"id":"https://openalex.org/keywords/simulation","display_name":"Simulation","score":0.34331637620925903}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8393415212631226},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.7964882850646973},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7643693089485168},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5825532078742981},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.5536622405052185},{"id":"https://openalex.org/C91306197","wikidata":"https://www.wikidata.org/wiki/Q45767","display_name":"Competition (biology)","level":2,"score":0.5029653906822205},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.49808454513549805},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.43415015935897827},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.3874087631702423},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.37422752380371094},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.34331637620925903},{"id":"https://openalex.org/C18903297","wikidata":"https://www.wikidata.org/wiki/Q7150","display_name":"Ecology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/icarcv.2018.8581310","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icarcv.2018.8581310","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2018 15th International Conference on Control, Automation, Robotics and Vision (ICARCV)","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2012.08824","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.08824","pdf_url":"https://arxiv.org/pdf/2012.08824","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2012.08824","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2012.08824","pdf_url":"https://arxiv.org/pdf/2012.08824","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1499408472","https://openalex.org/W1569296262","https://openalex.org/W1777239053","https://openalex.org/W2020677283","https://openalex.org/W2098432798","https://openalex.org/W2113325037","https://openalex.org/W2119567691","https://openalex.org/W2130750514","https://openalex.org/W2173248099","https://openalex.org/W2397581010","https://openalex.org/W2491675558","https://openalex.org/W2557283755","https://openalex.org/W2735025040","https://openalex.org/W2785694322","https://openalex.org/W2795755662","https://openalex.org/W2795776994","https://openalex.org/W2796290181","https://openalex.org/W2913167884","https://openalex.org/W2963802910","https://openalex.org/W2963864421","https://openalex.org/W2963876278","https://openalex.org/W3103379718","https://openalex.org/W3104515094","https://openalex.org/W4298023569","https://openalex.org/W6638088447","https://openalex.org/W6712392314","https://openalex.org/W6722970529"],"related_works":["https://openalex.org/W2475116013","https://openalex.org/W2770018148","https://openalex.org/W2358308169","https://openalex.org/W2385135707","https://openalex.org/W2140315382","https://openalex.org/W2059109728","https://openalex.org/W322691623","https://openalex.org/W2494989134","https://openalex.org/W2509444723","https://openalex.org/W2004958254"],"abstract_inverted_index":{"Learning":[0],"to":[1,22,34,43,60,67,95,104,128,152,186],"produce":[2,68,110],"efficient":[3],"movement":[4],"behaviour":[5,233],"for":[6,133],"humanoid":[7,41,101],"robots":[8],"from":[9,83,90,174,179,247],"scratch":[10],"is":[11],"a":[12,36,40,46,56,111,130,192],"hard":[13],"problem,":[14],"as":[15,191,213,215],"has":[16],"been":[17],"illustrated":[18],"by":[19],"the":[20,97,100,107,118,143,149,156,163,175,180,202,216,237,244,248],"\u201cLearning":[21],"run\u201d":[23],"competition":[24,32,182],"at":[25,124],"NIPS":[26,181],"2017.":[27],"The":[28],"goal":[29],"of":[30,39,85,99,120,221,243],"this":[31,77,146],"was":[33],"train":[35],"two-legged":[37],"model":[38],"body":[42,122],"run":[44],"in":[45,155,162,168,206,218,234],"simulated":[47],"race":[48],"course":[49],"with":[50,183,236],"maximum":[51],"speed.":[52],"All":[53],"submissions":[54],"took":[55],"tabula":[57],"rasa":[58],"approach":[59,147,228],"reinforcement":[61],"learning":[62,102,108],"(RL)":[63],"and":[64,109],"were":[65],"able":[66],"relatively":[69],"fast,":[70],"but":[71],"not":[72,141],"optimal":[73,144],"running":[74,87,232,245],"behaviour.":[75],"In":[76],"paper,":[78],"we":[79,115,170],"demonstrate":[80,196],"how":[81,197],"data":[82],"videos":[84],"human":[86,157],"(e.g.":[88],"taken":[89],"YouTube)":[91],"can":[92,229],"be":[93],"used":[94],"shape":[96],"reward":[98,135,199],"agent":[103,151,190,209,246],"speed":[105],"up":[106],"better":[112],"result.":[113],"Specifically,":[114],"are":[116,160],"using":[117],"positions":[119],"key":[121],"parts":[123],"regular":[125],"time":[126],"intervals":[127],"define":[129],"potential":[131],"function":[132],"potential-based":[134],"shaping":[136,200],"(PBRS).":[137],"Since":[138],"PBRS":[139],"does":[140],"change":[142],"policy,":[145],"allows":[148],"RL":[150,208],"overcome":[153,230],"sub-optimalities":[154],"movements":[158],"that":[159,210,226,242],"shown":[161],"videos.":[164],"We":[165,194,223],"present":[166],"experiments":[167],"which":[169],"combine":[171],"selected":[172],"techniques":[173],"top":[176],"ten":[177],"approaches":[178],"further":[184],"optimizations":[185],"create":[187],"an":[188,207],"high-performing":[189],"baseline.":[193],"then":[195],"video-based":[198],"improves":[201],"performance":[203],"further,":[204],"resulting":[205],"runs":[211],"twice":[212],"fast":[214],"baseline":[217],"12":[219],"hours":[220],"training.":[222],"furthermore":[224],"show":[225],"our":[227],"sub-optimal":[231],"videos,":[235],"learned":[238],"policy":[239],"significantly":[240],"outperforming":[241],"video.":[249]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
