{"id":"https://openalex.org/W4401415884","doi":"https://doi.org/10.1109/lra.2024.3440731","title":"Efficient Reinforcement Learning of Task Planners for Robotic Palletization Through Iterative Action Masking Learning","display_name":"Efficient Reinforcement Learning of Task Planners for Robotic Palletization Through Iterative Action Masking Learning","publication_year":2024,"publication_date":"2024-08-08","ids":{"openalex":"https://openalex.org/W4401415884","doi":"https://doi.org/10.1109/lra.2024.3440731"},"language":"en","primary_location":{"id":"doi:10.1109/lra.2024.3440731","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3440731","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101965827","display_name":"Zheng Wu","orcid":"https://orcid.org/0000-0003-3248-4153"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zheng Wu","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-3248-4153","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101587408","display_name":"Yichuan Li","orcid":"https://orcid.org/0000-0001-8473-5378"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yichuan Li","raw_affiliation_strings":["T Stone Robotics Institute, Department of Mechanical and Automation Engineering, Chinese University of Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0001-8473-5378","affiliations":[{"raw_affiliation_string":"T Stone Robotics Institute, Department of Mechanical and Automation Engineering, Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101770873","display_name":"Wei Zhan","orcid":"https://orcid.org/0000-0002-1474-1200"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei Zhan","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-1474-1200","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040156274","display_name":"Changliu Liu","orcid":"https://orcid.org/0000-0002-3767-5517"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Changliu Liu","raw_affiliation_strings":["Robotics Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":"https://orcid.org/0000-0002-3767-5517","affiliations":[{"raw_affiliation_string":"Robotics Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100707660","display_name":"Yunhui Liu","orcid":"https://orcid.org/0000-0002-3625-6679"},"institutions":[{"id":"https://openalex.org/I177725633","display_name":"Chinese University of Hong Kong","ror":"https://ror.org/00t33hh48","country_code":"HK","type":"education","lineage":["https://openalex.org/I177725633"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Yun-Hui Liu","raw_affiliation_strings":["T Stone Robotics Institute, Department of Mechanical and Automation Engineering, Chinese University of Hong Kong, Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-3625-6679","affiliations":[{"raw_affiliation_string":"T Stone Robotics Institute, Department of Mechanical and Automation Engineering, Chinese University of Hong Kong, Hong Kong","institution_ids":["https://openalex.org/I177725633"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5064077634","display_name":"Masayoshi Tomizuka","orcid":"https://orcid.org/0000-0003-0206-6639"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Masayoshi Tomizuka","raw_affiliation_strings":["Department of Mechanical Engineering, University of California, Berkeley, CA, USA"],"raw_orcid":"https://orcid.org/0000-0003-0206-6639","affiliations":[{"raw_affiliation_string":"Department of Mechanical Engineering, University of California, Berkeley, CA, USA","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.8491,"has_fulltext":false,"cited_by_count":13,"citation_normalized_percentile":{"value":0.91102689,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"9","issue":"11","first_page":"9303","last_page":"9310"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10510","display_name":"Stroke Rehabilitation and Recovery","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/2742","display_name":"Rehabilitation"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10510","display_name":"Stroke Rehabilitation and Recovery","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/2742","display_name":"Rehabilitation"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.977400004863739,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10784","display_name":"Muscle activation and electromyography studies","score":0.9652000069618225,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7728888988494873},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.7272761464118958},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.648493766784668},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5905239582061768},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.5861700773239136},{"id":"https://openalex.org/keywords/error-driven-learning","display_name":"Error-driven learning","score":0.47241559624671936},{"id":"https://openalex.org/keywords/action-learning","display_name":"Action learning","score":0.4655761122703552},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43601080775260925},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.42554858326911926},{"id":"https://openalex.org/keywords/iterative-learning-control","display_name":"Iterative learning control","score":0.41335228085517883},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.24117004871368408},{"id":"https://openalex.org/keywords/control","display_name":"Control (management)","score":0.16077259182929993},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.156248539686203},{"id":"https://openalex.org/keywords/mathematics-education","display_name":"Mathematics education","score":0.11430943012237549},{"id":"https://openalex.org/keywords/cooperative-learning","display_name":"Cooperative learning","score":0.09715771675109863},{"id":"https://openalex.org/keywords/teaching-method","display_name":"Teaching method","score":0.07061177492141724},{"id":"https://openalex.org/keywords/art","display_name":"Art","score":0.04504525661468506}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7728888988494873},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.7272761464118958},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.648493766784668},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5905239582061768},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.5861700773239136},{"id":"https://openalex.org/C47932503","wikidata":"https://www.wikidata.org/wiki/Q5395689","display_name":"Error-driven learning","level":3,"score":0.47241559624671936},{"id":"https://openalex.org/C183759332","wikidata":"https://www.wikidata.org/wiki/Q343680","display_name":"Action learning","level":4,"score":0.4655761122703552},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43601080775260925},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.42554858326911926},{"id":"https://openalex.org/C117619785","wikidata":"https://www.wikidata.org/wiki/Q6094414","display_name":"Iterative learning control","level":3,"score":0.41335228085517883},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.24117004871368408},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.16077259182929993},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.156248539686203},{"id":"https://openalex.org/C145420912","wikidata":"https://www.wikidata.org/wiki/Q853077","display_name":"Mathematics education","level":1,"score":0.11430943012237549},{"id":"https://openalex.org/C51672120","wikidata":"https://www.wikidata.org/wiki/Q303446","display_name":"Cooperative learning","level":3,"score":0.09715771675109863},{"id":"https://openalex.org/C88610354","wikidata":"https://www.wikidata.org/wiki/Q1813494","display_name":"Teaching method","level":2,"score":0.07061177492141724},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.04504525661468506},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/lra.2024.3440731","is_oa":false,"landing_page_url":"https://doi.org/10.1109/lra.2024.3440731","pdf_url":null,"source":{"id":"https://openalex.org/S4210169774","display_name":"IEEE Robotics and Automation Letters","issn_l":"2377-3766","issn":["2377-3766"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Robotics and Automation Letters","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W1903029394","https://openalex.org/W1920942232","https://openalex.org/W1923697677","https://openalex.org/W2004065455","https://openalex.org/W2066875820","https://openalex.org/W2101057470","https://openalex.org/W2158782408","https://openalex.org/W2158804684","https://openalex.org/W2598380049","https://openalex.org/W2736601468","https://openalex.org/W2749807327","https://openalex.org/W2967842325","https://openalex.org/W2996037775","https://openalex.org/W2996896271","https://openalex.org/W3082613747","https://openalex.org/W3087791475","https://openalex.org/W3107106036","https://openalex.org/W3175494061","https://openalex.org/W3181633176","https://openalex.org/W4226102240","https://openalex.org/W4394672593","https://openalex.org/W6640174482","https://openalex.org/W6640295612","https://openalex.org/W6741002519","https://openalex.org/W6743756900","https://openalex.org/W6772005887","https://openalex.org/W6797024940","https://openalex.org/W6798790070","https://openalex.org/W6864350279"],"related_works":["https://openalex.org/W2371091044","https://openalex.org/W2171010636","https://openalex.org/W87513465","https://openalex.org/W2391666574","https://openalex.org/W2786230833","https://openalex.org/W3203256658","https://openalex.org/W2352650970","https://openalex.org/W1544514152","https://openalex.org/W1493952344","https://openalex.org/W4312372616"],"abstract_inverted_index":{"The":[0,111],"development":[1],"of":[2,11,29,46,68,85,104,117,127],"robotic":[3,39,109],"systems":[4],"for":[5,37],"palletization":[6],"in":[7,20,33,108,123,130],"logistics":[8,136],"scenarios":[9],"is":[10,52],"paramount":[12],"importance,":[13],"addressing":[14],"critical":[15],"efficiency":[16],"and":[17,75,102,132],"precision":[18],"demands":[19],"supply":[21],"chain":[22],"management.":[23],"This":[24],"paper":[25],"investigates":[26],"the":[27,43,77,83,86,94,100,105,115,125],"application":[28],"Reinforcement":[30],"Learning":[31],"(RL)":[32],"enhancing":[34],"task":[35,106],"planning":[36,107],"such":[38],"systems.":[40],"Confronted":[41],"with":[42],"substantial":[44],"challenge":[45],"a":[47,53,65],"vast":[48],"action":[49,78,87],"space,":[50,88],"which":[51],"significant":[54],"impediment":[55],"to":[56,72],"efficiently":[57],"apply":[58],"out-of-the-shelf":[59],"RL":[60,128],"methods,":[61],"our":[62,89],"study":[63],"introduces":[64],"novel":[66],"method":[67],"utilizing":[69],"supervised":[70],"learning":[71,95],"iteratively":[73],"prune":[74],"manage":[76],"space":[79],"effectively.":[80],"By":[81],"reducing":[82],"complexity":[84],"approach":[90],"not":[91],"only":[92],"accelerates":[93],"phase":[96],"but":[97],"also":[98],"ensures":[99],"effectiveness":[101],"reliability":[103],"palletization.":[110,137],"experiemental":[112],"results":[113],"underscore":[114],"efficacy":[116],"this":[118],"method,":[119],"highlighting":[120],"its":[121],"potential":[122],"improving":[124],"performance":[126],"applications":[129],"complex":[131],"high-dimensional":[133],"environments":[134],"like":[135]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
