{"id":"https://openalex.org/W2923252998","doi":"https://doi.org/10.24963/ijcai.2019/382","title":"Autoregressive Policies for Continuous Control Deep Reinforcement Learning","display_name":"Autoregressive Policies for Continuous Control Deep Reinforcement Learning","publication_year":2019,"publication_date":"2019-07-28","ids":{"openalex":"https://openalex.org/W2923252998","doi":"https://doi.org/10.24963/ijcai.2019/382","mag":"2923252998"},"language":"en","primary_location":{"id":"doi:10.24963/ijcai.2019/382","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/382","pdf_url":"https://www.ijcai.org/proceedings/2019/0382.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.ijcai.org/proceedings/2019/0382.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5058918505","display_name":"Dmytro Korenkevych","orcid":"https://orcid.org/0009-0007-5748-9571"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Dmytro Korenkevych","raw_affiliation_strings":["Kindred AI"],"affiliations":[{"raw_affiliation_string":"Kindred AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102815213","display_name":"A. Rupam Mahmood","orcid":"https://orcid.org/0000-0002-2640-1844"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"A. Rupam Mahmood","raw_affiliation_strings":["Kindred AI"],"affiliations":[{"raw_affiliation_string":"Kindred AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103860806","display_name":"Gautham Vasan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gautham Vasan","raw_affiliation_strings":["Kindred AI"],"affiliations":[{"raw_affiliation_string":"Kindred AI","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057264590","display_name":"James Bergstra","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"James Bergstra","raw_affiliation_strings":["Kindred AI"],"affiliations":[{"raw_affiliation_string":"Kindred AI","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5058918505"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.8806,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.89232539,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"2754","last_page":"2762"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9843000173568726,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11195","display_name":"Simulation Techniques and Applications","score":0.9779999852180481,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.8115085959434509},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7469910383224487},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7040747404098511},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.5952628254890442},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5353785753250122},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.5150103569030762},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5013861656188965},{"id":"https://openalex.org/keywords/coherence","display_name":"Coherence (philosophical gambling strategy)","score":0.466065913438797},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4623386263847351},{"id":"https://openalex.org/keywords/interface","display_name":"Interface (matter)","score":0.45042547583580017},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.4414823055267334},{"id":"https://openalex.org/keywords/stochastic-process","display_name":"Stochastic process","score":0.42142951488494873},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.386076956987381},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.19981315732002258},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.14578840136528015},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.07872304320335388}],"concepts":[{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.8115085959434509},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7469910383224487},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7040747404098511},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.5952628254890442},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5353785753250122},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.5150103569030762},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5013861656188965},{"id":"https://openalex.org/C2781181686","wikidata":"https://www.wikidata.org/wiki/Q4226068","display_name":"Coherence (philosophical gambling strategy)","level":2,"score":0.466065913438797},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4623386263847351},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.45042547583580017},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4414823055267334},{"id":"https://openalex.org/C8272713","wikidata":"https://www.wikidata.org/wiki/Q176737","display_name":"Stochastic process","level":2,"score":0.42142951488494873},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.386076956987381},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.19981315732002258},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.14578840136528015},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.07872304320335388},{"id":"https://openalex.org/C157915830","wikidata":"https://www.wikidata.org/wiki/Q2928001","display_name":"Bubble","level":2,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C129307140","wikidata":"https://www.wikidata.org/wiki/Q6795880","display_name":"Maximum bubble pressure method","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.24963/ijcai.2019/382","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/382","pdf_url":"https://www.ijcai.org/proceedings/2019/0382.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1903.11524","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1903.11524","pdf_url":"https://arxiv.org/pdf/1903.11524","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2923252998","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1903.11524v1","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1903.11524","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1903.11524","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.24963/ijcai.2019/382","is_oa":true,"landing_page_url":"https://doi.org/10.24963/ijcai.2019/382","pdf_url":"https://www.ijcai.org/proceedings/2019/0382.pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Twenty-Eighth International Joint Conference on Artificial Intelligence","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2923252998.pdf","grobid_xml":"https://content.openalex.org/works/W2923252998.grobid-xml"},"referenced_works_count":15,"referenced_works":["https://openalex.org/W1777239053","https://openalex.org/W2119567691","https://openalex.org/W2121863487","https://openalex.org/W2173248099","https://openalex.org/W2614839826","https://openalex.org/W2615790994","https://openalex.org/W2623491082","https://openalex.org/W2736601468","https://openalex.org/W2749807327","https://openalex.org/W2810785043","https://openalex.org/W2885550588","https://openalex.org/W2962755674","https://openalex.org/W2963639957","https://openalex.org/W2964043796","https://openalex.org/W2964227158"],"related_works":["https://openalex.org/W2966219705","https://openalex.org/W2736601468","https://openalex.org/W2787938642","https://openalex.org/W1771410628","https://openalex.org/W2781726626","https://openalex.org/W3037033418","https://openalex.org/W3198607174","https://openalex.org/W2795908317","https://openalex.org/W3151079898","https://openalex.org/W3113841101","https://openalex.org/W2796496570","https://openalex.org/W3204858865","https://openalex.org/W2895958971","https://openalex.org/W2892267807","https://openalex.org/W2951056918","https://openalex.org/W2121103318","https://openalex.org/W2063266914","https://openalex.org/W2946299167","https://openalex.org/W2288576358","https://openalex.org/W2902494507"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,85,162],"algorithms":[2],"rely":[3],"on":[4],"exploration":[5,33,62,101,173,188],"to":[6,44,77,99],"discover":[7],"new":[8],"behaviors,":[9],"which":[10],"is":[11,132],"typically":[12],"achieved":[13],"by":[14],"following":[15],"a":[16,25,78,91],"stochastic":[17,97],"policy.":[18],"In":[19,52],"continuous":[20,86,103],"control":[21,87,104],"tasks,":[22],"policies":[23,55],"with":[24,121,158],"Gaussian":[26,32,54],"distribution":[27,131],"have":[28],"been":[29],"widely":[30],"adopted.":[31],"however":[34],"does":[35],"not":[36,57],"result":[37,58],"in":[38,49,59,84,102,171,177],"smooth":[39,187],"trajectories":[40,189],"that":[41,108,141,167,190],"generally":[42],"correspond":[43],"safe":[45,192],"and":[46,66,127,174,180],"rewarding":[47],"behaviors":[48],"practical":[50],"tasks.":[51,88],"addition,":[53],"do":[56],"an":[60,64,137],"effective":[61],"of":[63,93,125,194],"environment":[65],"become":[67],"increasingly":[68],"inefficient":[69],"as":[70],"the":[71,128,146,159],"action":[72],"rate":[73],"increases.":[74],"This":[75],"contributes":[76],"low":[79],"sample":[80,175],"efficiency":[81,176],"often":[82],"observed":[83],"We":[89,106,135,150],"introduce":[90],"family":[92],"stationary":[94,130],"autoregressive":[95,138],"(AR)":[96],"processes":[98,110,144],"facilitate":[100],"domains.":[105],"show":[107,151],"proposed":[109],"possess":[111],"two":[112],"desirable":[113],"features:":[114],"subsequent":[115],"process":[116,129],"observations":[117],"are":[118],"temporally":[119],"coherent":[120],"continuously":[122],"adjustable":[123],"degree":[124],"coherence,":[126],"standard":[133,147],"normal.":[134],"derive":[136],"policy":[139],"(ARP)":[140],"implements":[142],"such":[143],"maintaining":[145],"agent-environment":[148],"interface.":[149],"how":[152],"ARPs":[153,169],"can":[154],"be":[155],"easily":[156],"used":[157],"existing":[160],"off-the-shelf":[161],"algorithms.":[163],"Empirically":[164],"we":[165],"demonstrate":[166],"using":[168],"results":[170],"improved":[172],"both":[178],"simulated":[179],"real":[181],"world":[182],"domains,":[183],"and,":[184],"furthermore,":[185],"provides":[186],"enable":[191],"operation":[193],"robotic":[195],"hardware.":[196]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2025-10-10T00:00:00"}
