{"id":"https://openalex.org/W3131822805","doi":"https://doi.org/10.1109/iros45743.2020.9341477","title":"TTR-Based Reward for Reinforcement Learning with Implicit Model Priors","display_name":"TTR-Based Reward for Reinforcement Learning with Implicit Model Priors","publication_year":2020,"publication_date":"2020-10-24","ids":{"openalex":"https://openalex.org/W3131822805","doi":"https://doi.org/10.1109/iros45743.2020.9341477","mag":"3131822805"},"language":"en","primary_location":{"id":"doi:10.1109/iros45743.2020.9341477","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros45743.2020.9341477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045948472","display_name":"Xubo Lyu","orcid":"https://orcid.org/0000-0003-2110-9075"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xubo Lyu","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,CA,V5A1S6"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,CA,V5A1S6","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100387244","display_name":"Mo Chen","orcid":"https://orcid.org/0000-0001-8506-3665"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mo Chen","raw_affiliation_strings":["Simon Fraser University,School of Computing Science,CA,V5A1S6"],"affiliations":[{"raw_affiliation_string":"Simon Fraser University,School of Computing Science,CA,V5A1S6","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5045948472"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3977,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.70531152,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"5484","last_page":"5489"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10791","display_name":"Advanced Control Systems Optimization","score":0.9832000136375427,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9786999821662903,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8889971971511841},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7827683687210083},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5517793893814087},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.5070571899414062},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5032939314842224},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4899260401725769},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4842652678489685},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.45673561096191406},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.44590887427330017},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3293110728263855}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8889971971511841},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7827683687210083},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5517793893814087},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.5070571899414062},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5032939314842224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4899260401725769},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4842652678489685},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.45673561096191406},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.44590887427330017},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3293110728263855},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C175444787","wikidata":"https://www.wikidata.org/wiki/Q39072","display_name":"Microeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros45743.2020.9341477","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros45743.2020.9341477","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320334593","display_name":"Natural Sciences and Engineering Research Council of Canada","ror":"https://ror.org/01h531d29"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":41,"referenced_works":["https://openalex.org/W1487586009","https://openalex.org/W1757796397","https://openalex.org/W1771410628","https://openalex.org/W1969160376","https://openalex.org/W2001331101","https://openalex.org/W2009447937","https://openalex.org/W2054585537","https://openalex.org/W2068581168","https://openalex.org/W2085889587","https://openalex.org/W2112047499","https://openalex.org/W2140135625","https://openalex.org/W2142401694","https://openalex.org/W2167340365","https://openalex.org/W2173248099","https://openalex.org/W2298907094","https://openalex.org/W2580909119","https://openalex.org/W2604960773","https://openalex.org/W2736601468","https://openalex.org/W2737215407","https://openalex.org/W2737223130","https://openalex.org/W2803281228","https://openalex.org/W2925234320","https://openalex.org/W2950614095","https://openalex.org/W2962888568","https://openalex.org/W2962896691","https://openalex.org/W2963311874","https://openalex.org/W2963606896","https://openalex.org/W2963864421","https://openalex.org/W2974778612","https://openalex.org/W3105818906","https://openalex.org/W3131822805","https://openalex.org/W4298857966","https://openalex.org/W6637967152","https://openalex.org/W6638018090","https://openalex.org/W6680657880","https://openalex.org/W6684921986","https://openalex.org/W6735939104","https://openalex.org/W6741002519","https://openalex.org/W6741302124","https://openalex.org/W6752089545","https://openalex.org/W6761139348"],"related_works":["https://openalex.org/W4400868993","https://openalex.org/W2145363145","https://openalex.org/W2341346307","https://openalex.org/W2154399718","https://openalex.org/W4321463377","https://openalex.org/W4384574988","https://openalex.org/W2768629321","https://openalex.org/W2130711276","https://openalex.org/W4308828368","https://openalex.org/W1528400370"],"abstract_inverted_index":{"Model-free":[0],"reinforcement":[1],"learning":[2,9,30,203],"(RL)":[3],"is":[4,25,46,89,117,141,183],"a":[5,98],"powerful":[6],"approach":[7,164,198],"for":[8,144,152],"control":[10,37],"policies":[11],"directly":[12],"from":[13,126],"high-dimensional":[14,54,147],"state":[15,128],"and":[16,56,65,168,182,205,211,218],"observation.":[17],"However,":[18],"it":[19,151],"tends":[20],"to":[21,51,78,101,124,129],"be":[22,166],"data-inefficient,":[23],"which":[24],"especially":[26],"costly":[27],"in":[28,110,215],"robotic":[29,202],"tasks.":[31],"On":[32],"the":[33,43,105,120,130,138,178,192],"other":[34,187],"hand,":[35],"optimal":[36,66,75],"does":[38],"not":[39],"require":[40],"data":[41,80,216],"if":[42],"system":[44,94,134,155],"model":[45,95],"known,":[47],"but":[48],"cannot":[49],"scale":[50],"models":[52,156],"with":[53,146,185],"states":[55],"observations.":[57],"To":[58],"exploit":[59],"benefits":[60],"of":[61,85],"both":[62],"model-free":[63,86,173,208],"RL":[64,106,174,193,209],"control,":[67],"we":[68,149],"propose":[69],"time-to-reach-based":[70],"(TTR-based)":[71],"reward":[72],"shaping,":[73],"an":[74],"control-inspired":[76],"technique":[77],"alleviate":[79],"inefficiency":[81],"while":[82],"retaining":[83],"advantages":[84],"RL.":[87],"This":[88],"achieved":[90],"by":[91],"summarizing":[92],"key":[93,160],"information":[96],"using":[97],"TTR":[99,115,139],"function":[100,116,140],"greatly":[102],"speed":[103],"up":[104],"process,":[107],"as":[108,119],"shown":[109],"our":[111,197],"simulation":[112],"results.":[113],"The":[114],"defined":[118],"minimum":[121],"time":[122],"required":[123],"move":[125],"any":[127,172,186],"goal":[131],"under":[132],"assumed":[133],"dynamics":[135],"constraints.":[136],"Since":[137],"computationally":[142],"intractable":[143],"systems":[145],"states,":[148],"compute":[150],"approximate,":[153],"lower-dimensional":[154],"that":[157,189],"still":[158],"captures":[159],"dynamic":[161],"behaviors.":[162],"Our":[163],"can":[165],"flexibly":[167],"easily":[169],"incorporated":[170],"into":[171],"algorithm":[175,180],"without":[176],"altering":[177],"original":[179],"structure,":[181],"compatible":[184],"techniques":[188],"may":[190],"facilitate":[191],"process.":[194],"We":[195],"evaluate":[196],"on":[199],"two":[200],"representative":[201],"tasks":[204],"three":[206],"well-known":[207],"algorithms,":[210],"show":[212],"significant":[213],"improvements":[214],"efficiency":[217],"performance.":[219]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
