{"id":"https://openalex.org/W4307975685","doi":"https://doi.org/10.1109/tnnls.2022.3213606","title":"Composing Synergistic Macro Actions for Reinforcement Learning Agents","display_name":"Composing Synergistic Macro Actions for Reinforcement Learning Agents","publication_year":2022,"publication_date":"2022-11-04","ids":{"openalex":"https://openalex.org/W4307975685","doi":"https://doi.org/10.1109/tnnls.2022.3213606","pmid":"https://pubmed.ncbi.nlm.nih.gov/36318571"},"language":"en","primary_location":{"id":"doi:10.1109/tnnls.2022.3213606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3213606","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100384019","display_name":"Yu-Ming Chen","orcid":"https://orcid.org/0000-0003-0969-6270"},"institutions":[{"id":"https://openalex.org/I4210120917","display_name":"Taiwan Semiconductor Manufacturing Company (Taiwan)","ror":"https://ror.org/02wx79d08","country_code":"TW","type":"company","lineage":["https://openalex.org/I4210120917"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Yu-Ming Chen","raw_affiliation_strings":["Taiwan Semiconductor Manufacturing Company (TSMC), Hsinchu, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-0969-6270","affiliations":[{"raw_affiliation_string":"Taiwan Semiconductor Manufacturing Company (TSMC), Hsinchu, Taiwan","institution_ids":["https://openalex.org/I4210120917"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5084774178","display_name":"Kuan-Yu Chang","orcid":"https://orcid.org/0000-0003-1120-2970"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kaun-Yu Chang","raw_affiliation_strings":["Avery Design Systems Inc., Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0000-0003-1120-2970","affiliations":[{"raw_affiliation_string":"Avery Design Systems Inc., Taipei, Taiwan","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101733103","display_name":"Chien Liu","orcid":"https://orcid.org/0000-0001-7438-5378"},"institutions":[{"id":"https://openalex.org/I4665924","display_name":"University of Rostock","ror":"https://ror.org/03zdwsf69","country_code":"DE","type":"education","lineage":["https://openalex.org/I4665924"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Chien Liu","raw_affiliation_strings":["Faculty of Computer Science and Electrical Engineering, University of Rostock, Rostock, Germany"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Computer Science and Electrical Engineering, University of Rostock, Rostock, Germany","institution_ids":["https://openalex.org/I4665924"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056059920","display_name":"Tsu-Ching Hsiao","orcid":null},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Tsu-Ching Hsiao","raw_affiliation_strings":["Department of Computer Science, National Tsing Hua University, Hsinchu, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Tsing Hua University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017776857","display_name":"Zhang-Wei Hong","orcid":"https://orcid.org/0000-0001-8574-6108"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhang-Wei Hong","raw_affiliation_strings":["Massachusetts Institute of Technology, Cambridge, MA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology, Cambridge, MA, USA","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5028600832","display_name":"Chun\u2010Yi Lee","orcid":"https://orcid.org/0000-0002-4680-4800"},"institutions":[{"id":"https://openalex.org/I25846049","display_name":"National Tsing Hua University","ror":"https://ror.org/00zdnkx70","country_code":"TW","type":"education","lineage":["https://openalex.org/I25846049"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Chun-Yi Lee","raw_affiliation_strings":["Department of Computer Science, National Tsing Hua University, Hsinchu, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-4680-4800","affiliations":[{"raw_affiliation_string":"Department of Computer Science, National Tsing Hua University, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I25846049"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100384019"],"corresponding_institution_ids":["https://openalex.org/I4210120917"],"apc_list":null,"apc_paid":null,"fwci":0.1387,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.55795908,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"35","issue":"5","first_page":"7251","last_page":"7258"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9800000190734863,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.9103999733924866,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.9326621294021606},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.681740403175354},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6685889363288879},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.6124226450920105},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.5493867993354797},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4883591830730438},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4798528552055359},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4605104327201843},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.4369596540927887},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3938892185688019},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.19951462745666504},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10978427529335022}],"concepts":[{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.9326621294021606},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.681740403175354},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6685889363288879},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.6124226450920105},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.5493867993354797},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4883591830730438},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4798528552055359},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4605104327201843},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.4369596540927887},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3938892185688019},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.19951462745666504},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10978427529335022},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tnnls.2022.3213606","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tnnls.2022.3213606","pdf_url":null,"source":{"id":"https://openalex.org/S4210175523","display_name":"IEEE Transactions on Neural Networks and Learning Systems","issn_l":"2162-237X","issn":["2162-237X","2162-2388"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Neural Networks and Learning Systems","raw_type":"journal-article"},{"id":"pmid:36318571","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/36318571","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on neural networks and learning systems","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7699999809265137,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":32,"referenced_works":["https://openalex.org/W1503821144","https://openalex.org/W2006668710","https://openalex.org/W2095570298","https://openalex.org/W2109910161","https://openalex.org/W2110415190","https://openalex.org/W2139452777","https://openalex.org/W2145339207","https://openalex.org/W2150468603","https://openalex.org/W2157466031","https://openalex.org/W2160927913","https://openalex.org/W2292128556","https://openalex.org/W2401064609","https://openalex.org/W2605102581","https://openalex.org/W2810329704","https://openalex.org/W2905059440","https://openalex.org/W2924136311","https://openalex.org/W2982333832","https://openalex.org/W4214717370","https://openalex.org/W6601499978","https://openalex.org/W6601608957","https://openalex.org/W6604678340","https://openalex.org/W6631132174","https://openalex.org/W6675945467","https://openalex.org/W6684618555","https://openalex.org/W6717230150","https://openalex.org/W6718233198","https://openalex.org/W6729956949","https://openalex.org/W6729972426","https://openalex.org/W6733732937","https://openalex.org/W6741002519","https://openalex.org/W6752515464","https://openalex.org/W6766234440"],"related_works":["https://openalex.org/W2030816003","https://openalex.org/W3096874164","https://openalex.org/W1985560493","https://openalex.org/W2937181779","https://openalex.org/W2386410636","https://openalex.org/W2357975469","https://openalex.org/W2145363145","https://openalex.org/W1626977535","https://openalex.org/W2341346307","https://openalex.org/W3168977894"],"abstract_inverted_index":{"Macro":[0],"actions":[1,40,56,87],"have":[2,16],"been":[3],"demonstrated":[4],"to":[5,22,41,59,79,135,155],"be":[6,23,60,136],"beneficial":[7],"for":[8,25],"the":[9,53,84,92,105,121,141,150,157,166],"learning":[10],"processes":[11],"of":[12,20,95,107,168,176],"an":[13,64,77],"agent":[14,65,78],"and":[15,119],"encouraged":[17],"a":[18,43,69,108,114,127,130,174],"variety":[19],"techniques":[21,32],"developed":[24],"constructing":[26],"more":[27],"effective":[28],"ones.":[29],"However,":[30],"previous":[31],"usually":[33],"do":[34],"not":[35],"further":[36],"consider":[37],"combining":[38],"macro":[39,45,55,71,86,110,123,159,170],"form":[42],"synergistic":[44,70,109,158],"action":[46,72,111,124,160,171],"ensemble,":[47],"in":[48,100],"which":[49],"synergism":[50,134],"exhibits":[51],"when":[52],"constituent":[54],"are":[57],"favorable":[58],"jointly":[61],"used":[62],"by":[63,91,140],"during":[66],"evaluation.":[67],"Such":[68,129],"ensemble":[73,112,125],"may":[74],"potentially":[75],"allow":[76],"perform":[80],"even":[81],"better":[82],"than":[83],"individual":[85],"within":[88],"it.":[89],"Motivated":[90],"recent":[93],"advances":[94],"neural":[96],"architecture":[97],"search":[98],"(NAS),":[99],"this":[101],"brief,":[102],"we":[103,163],"formulate":[104],"construction":[106],"as":[113,126],"Markov":[115],"decision":[116],"process":[117],"(MDP)":[118],"evaluate":[120],"constructed":[122],"whole.":[128],"problem":[131],"formulation":[132],"enables":[133],"taken":[137],"into":[138],"account":[139],"proposed":[142,151],"evaluation":[143],"procedure.":[144],"Our":[145],"experimental":[146],"results":[147],"demonstrate":[148],"that":[149],"framework":[152],"is":[153],"able":[154],"discover":[156],"ensembles.":[161],"Furthermore,":[162],"also":[164],"highlight":[165],"benefits":[167],"these":[169],"ensembles":[172],"through":[173],"set":[175],"analytical":[177],"cases.":[178]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
