{"id":"https://openalex.org/W4288055820","doi":"https://doi.org/10.48550/arxiv.2207.11313","title":"Graph-Structured Policy Learning for Multi-Goal Manipulation Tasks","display_name":"Graph-Structured Policy Learning for Multi-Goal Manipulation Tasks","publication_year":2022,"publication_date":"2022-07-22","ids":{"openalex":"https://openalex.org/W4288055820","doi":"https://doi.org/10.48550/arxiv.2207.11313"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2207.11313","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11313","pdf_url":"https://arxiv.org/pdf/2207.11313","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2207.11313","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060590650","display_name":"David Klee","orcid":"https://orcid.org/0000-0003-1234-9755"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Klee, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050593664","display_name":"Ond\u0159ej B\u00ed\u017ea","orcid":"https://orcid.org/0000-0003-3390-8050"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Biza, Ondrej","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103163468","display_name":"Robert Platt","orcid":"https://orcid.org/0009-0001-2033-3473"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Platt, Robert","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5060590650"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9901999831199646,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9771999716758728,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.8368486762046814},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7619330883026123},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6814032793045044},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5719298124313354},{"id":"https://openalex.org/keywords/policy-learning","display_name":"Policy learning","score":0.5675974488258362},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.562345027923584},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.562182605266571},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5431737899780273},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5376575589179993},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.512178897857666},{"id":"https://openalex.org/keywords/block","display_name":"Block (permutation group theory)","score":0.4950923025608063},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4676278233528137},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.4375385642051697},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.43475842475891113},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33398669958114624},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.33262109756469727}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.8368486762046814},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7619330883026123},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6814032793045044},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5719298124313354},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.5675974488258362},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.562345027923584},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.562182605266571},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5431737899780273},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5376575589179993},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.512178897857666},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.4950923025608063},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4676278233528137},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.4375385642051697},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43475842475891113},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33398669958114624},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.33262109756469727},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2207.11313","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11313","pdf_url":"https://arxiv.org/pdf/2207.11313","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550/arxiv.2207.11313","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2207.11313","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2207.11313","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2207.11313","pdf_url":"https://arxiv.org/pdf/2207.11313","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/11","display_name":"Sustainable cities and communities","score":0.6000000238418579}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4288055820.pdf","grobid_xml":"https://content.openalex.org/works/W4288055820.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W2031695474","https://openalex.org/W2024136090","https://openalex.org/W2586732548","https://openalex.org/W2768698792","https://openalex.org/W2440023763","https://openalex.org/W2962474440"],"abstract_inverted_index":{"Multi-goal":[0],"policy":[1,126],"learning":[2,59],"for":[3],"robotic":[4],"manipulation":[5],"is":[6],"challenging.":[7],"Prior":[8],"successes":[9],"have":[10],"used":[11],"state-based":[12],"representations":[13],"of":[14,33,43],"the":[15,34,71,90,125],"objects":[16],"or":[17],"provided":[18],"demonstration":[19],"data":[20],"to":[21,40,117],"facilitate":[22],"learning.":[23],"In":[24],"this":[25],"paper,":[26],"by":[27,68],"hand-coding":[28],"a":[29,49,94,108,131],"high-level":[30],"discrete":[31,91],"representation":[32],"domain,":[35],"we":[36,123],"show":[37],"that":[38,88,101],"policies":[39,63],"reach":[41],"dozens":[42],"goals":[44],"can":[45,104],"be":[46],"learned":[47,127],"with":[48,119],"single":[50],"network":[51],"using":[52],"Q-learning":[53],"from":[54],"pixels.":[55],"The":[56],"agent":[57],"focuses":[58],"on":[60,93,130],"simpler,":[61],"local":[62],"which":[64],"are":[65],"sequenced":[66],"together":[67],"planning":[69],"in":[70,128],"abstract":[72],"space.":[73],"We":[74,99],"compare":[75],"our":[76,102],"method":[77,103],"against":[78],"standard":[79],"multi-goal":[80],"RL":[81],"baselines,":[82],"as":[83,85],"well":[84],"other":[86],"methods":[87],"leverage":[89],"representation,":[92],"challenging":[95],"block":[96,111],"construction":[97],"domain.":[98],"find":[100],"build":[105],"more":[106],"than":[107],"hundred":[109],"different":[110],"structures,":[112],"and":[113],"demonstrate":[114],"forward":[115],"transfer":[116],"structures":[118],"novel":[120],"objects.":[121],"Lastly,":[122],"deploy":[124],"simulation":[129],"real":[132],"robot.":[133]},"counts_by_year":[],"updated_date":"2026-02-09T09:26:11.010843","created_date":"2025-10-10T00:00:00"}
