{"id":"https://openalex.org/W4308243982","doi":"https://doi.org/10.48550/arxiv.2211.01724","title":"Learning Control by Iterative Inversion","display_name":"Learning Control by Iterative Inversion","publication_year":2022,"publication_date":"2022-11-03","ids":{"openalex":"https://openalex.org/W4308243982","doi":"https://doi.org/10.48550/arxiv.2211.01724"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2211.01724","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.01724","pdf_url":"https://arxiv.org/pdf/2211.01724","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2211.01724","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5059321255","display_name":"Gal Leibovich","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Leibovich, Gal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110808832","display_name":"Guy Jacob","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jacob, Guy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075263262","display_name":"Or Avner","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Avner, Or","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023367765","display_name":"Gal Novik","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Novik, Gal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5020699288","display_name":"Aviv Tamar","orcid":"https://orcid.org/0000-0002-1972-854X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tamar, Aviv","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9909999966621399,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.975600004196167,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/iterative-learning-control","display_name":"Iterative learning control","score":0.8240752816200256},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7364398241043091},{"id":"https://openalex.org/keywords/inversion","display_name":"Inversion (geology)","score":0.7039170861244202},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6665629148483276},{"id":"https://openalex.org/keywords/iterative-method","display_name":"Iterative method","score":0.5359818339347839},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.47046196460723877},{"id":"https://openalex.org/keywords/random-noise","display_name":"Random noise","score":0.4502637982368469},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4311451315879822},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37253373861312866},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.27662718296051025},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06235837936401367}],"concepts":[{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.8240752816200256},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7364398241043091},{"id":"https://openalex.org/C1893757","wikidata":"https://www.wikidata.org/wiki/Q3653001","display_name":"Inversion (geology)","level":3,"score":0.7039170861244202},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6665629148483276},{"id":"https://openalex.org/C159694833","wikidata":"https://www.wikidata.org/wiki/Q2321565","display_name":"Iterative method","level":2,"score":0.5359818339347839},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.47046196460723877},{"id":"https://openalex.org/C2986577269","wikidata":"https://www.wikidata.org/wiki/Q11306265","display_name":"Random noise","level":2,"score":0.4502637982368469},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4311451315879822},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37253373861312866},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.27662718296051025},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06235837936401367},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C109007969","wikidata":"https://www.wikidata.org/wiki/Q749565","display_name":"Structural basin","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2211.01724","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.01724","pdf_url":"https://arxiv.org/pdf/2211.01724","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2211.01724","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2211.01724","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2211.01724","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2211.01724","pdf_url":"https://arxiv.org/pdf/2211.01724","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4386994694","https://openalex.org/W4388738109","https://openalex.org/W2362901947","https://openalex.org/W2362086884","https://openalex.org/W1606071314","https://openalex.org/W2350210972","https://openalex.org/W2369126164","https://openalex.org/W1482785882","https://openalex.org/W2376218423","https://openalex.org/W2761624296"],"abstract_inverted_index":{"We":[0,67],"propose":[1],"$\\textit{iterative":[2],"inversion}$":[3],"--":[4],"an":[5,9,45,155],"algorithm":[6],"for":[7],"learning":[8,58],"inverse":[10],"function":[11],"without":[12],"input-output":[13],"pairs,":[14],"but":[15],"only":[16,117],"with":[17,136],"samples":[18],"from":[19],"the":[20,27,38,42,57,65,102],"desired":[21,39,82],"output":[22],"distribution":[23],"and":[24,41,49,92,116,132,140],"access":[25],"to":[26,71,97,126,163],"forward":[28],"function.":[29,66],"The":[30],"key":[31],"challenge":[32],"is":[33,76],"a":[34,77,137,141],"$\\textit{distribution":[35],"shift}$":[36],"between":[37],"outputs":[40,43],"of":[44,79,81,88],"initial":[46],"random":[47,107],"guess,":[48],"we":[50,144,153],"prove":[51],"that":[52],"iterative":[53,69],"inversion":[54,70],"can":[55,122],"steer":[56],"correctly,":[59],"under":[60],"rather":[61],"strict":[62],"conditions":[63],"on":[64,149,158],"apply":[68],"learn":[72],"control.":[73],"Our":[74,110],"input":[75],"set":[78],"demonstrations":[80],"behavior,":[83],"given":[84],"as":[85],"video":[86],"embeddings":[87],"trajectories":[89,99],"(without":[90],"actions),":[91],"our":[93],"method":[94],"iteratively":[95],"learns":[96],"imitate":[98],"generated":[100],"by":[101,106],"current":[103],"policy,":[104,143],"perturbed":[105],"exploration":[108],"noise.":[109],"approach":[111],"does":[112],"not":[113],"require":[114],"rewards,":[115],"employs":[118],"supervised":[119],"learning,":[120],"which":[121],"be":[123],"easily":[124],"scaled":[125],"use":[127],"state-of-the-art":[128],"trajectory":[129],"embedding":[130],"techniques":[131],"policy":[133],"representations.":[134],"Indeed,":[135],"VQ-VAE":[138],"embedding,":[139],"transformer-based":[142],"demonstrate":[145],"non-trivial":[146],"continuous":[147],"control":[148],"several":[150],"tasks.":[151],"Further,":[152],"report":[154],"improved":[156],"performance":[157],"imitating":[159],"diverse":[160],"behaviors":[161],"compared":[162],"reward":[164],"based":[165],"methods.":[166]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
