{"id":"https://openalex.org/W4220733308","doi":"https://doi.org/10.1109/tcyb.2022.3157892","title":"Double Sparse Deep Reinforcement Learning via Multilayer Sparse Coding and Nonconvex Regularized Pruning","display_name":"Double Sparse Deep Reinforcement Learning via Multilayer Sparse Coding and Nonconvex Regularized Pruning","publication_year":2022,"publication_date":"2022-03-22","ids":{"openalex":"https://openalex.org/W4220733308","doi":"https://doi.org/10.1109/tcyb.2022.3157892","pmid":"https://pubmed.ncbi.nlm.nih.gov/35316206"},"language":"en","primary_location":{"id":"doi:10.1109/tcyb.2022.3157892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2022.3157892","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Haoli Zhao","orcid":"https://orcid.org/0000-0002-4004-509X"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haoli Zhao","raw_affiliation_strings":["School of Automation, Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jiqiang Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiqiang Wu","raw_affiliation_strings":["School of Automation, Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhenni Li","orcid":"https://orcid.org/0000-0001-8098-0341"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenni Li","raw_affiliation_strings":["School of Automation and the Guangdong Key Laboratory of IoT Information Technology, Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation and the Guangdong Key Laboratory of IoT Information Technology, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Wuhui Chen","orcid":"https://orcid.org/0000-0003-4430-7904"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wuhui Chen","raw_affiliation_strings":["School of Automation, Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]},{"author_position":"last","author":{"id":null,"display_name":"Zibin Zheng","orcid":"https://orcid.org/0000-0001-7872-7718"},"institutions":[{"id":"https://openalex.org/I139024713","display_name":"Guangdong University of Technology","ror":"https://ror.org/04azbjn80","country_code":"CN","type":"education","lineage":["https://openalex.org/I139024713"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zibin Zheng","raw_affiliation_strings":["School of Automation, Guangdong University of Technology, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Automation, Guangdong University of Technology, Guangzhou, China","institution_ids":["https://openalex.org/I139024713"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I139024713"],"apc_list":null,"apc_paid":null,"fwci":3.999,"has_fulltext":false,"cited_by_count":29,"citation_normalized_percentile":{"value":0.94338413,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"53","issue":"2","first_page":"765","last_page":"778"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7950000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.7950000166893005,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.017500000074505806,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.011699999682605267,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sparse-approximation","display_name":"Sparse approximation","score":0.6741999983787537},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6349999904632568},{"id":"https://openalex.org/keywords/neural-coding","display_name":"Neural coding","score":0.6075999736785889},{"id":"https://openalex.org/keywords/pruning","display_name":"Pruning","score":0.6051999926567078},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4916999936103821},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.48249998688697815},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43849998712539673}],"concepts":[{"id":"https://openalex.org/C124066611","wikidata":"https://www.wikidata.org/wiki/Q28684319","display_name":"Sparse approximation","level":2,"score":0.6741999983787537},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6478000283241272},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6349999904632568},{"id":"https://openalex.org/C77637269","wikidata":"https://www.wikidata.org/wiki/Q7002051","display_name":"Neural coding","level":2,"score":0.6075999736785889},{"id":"https://openalex.org/C108010975","wikidata":"https://www.wikidata.org/wiki/Q500094","display_name":"Pruning","level":2,"score":0.6051999926567078},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5386999845504761},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4916999936103821},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.48249998688697815},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.45680001378059387},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43849998712539673},{"id":"https://openalex.org/C32022120","wikidata":"https://www.wikidata.org/wiki/Q797225","display_name":"Interference (communication)","level":3,"score":0.4341999888420105},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4077000021934509},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4034999907016754},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.39489999413490295},{"id":"https://openalex.org/C56372850","wikidata":"https://www.wikidata.org/wiki/Q1050404","display_name":"Sparse matrix","level":3,"score":0.3853999972343445},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.31690001487731934},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C147764199","wikidata":"https://www.wikidata.org/wiki/Q6865248","display_name":"Minification","level":2,"score":0.27559998631477356}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tcyb.2022.3157892","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcyb.2022.3157892","pdf_url":null,"source":{"id":"https://openalex.org/S4210191041","display_name":"IEEE Transactions on Cybernetics","issn_l":"2168-2267","issn":["2168-2267","2168-2275"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Cybernetics","raw_type":"journal-article"},{"id":"pmid:35316206","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/35316206","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on cybernetics","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1641720636","https://openalex.org/W2034219614","https://openalex.org/W2043317267","https://openalex.org/W2145339207","https://openalex.org/W2744511404","https://openalex.org/W2752693045","https://openalex.org/W2902098903","https://openalex.org/W2909711564","https://openalex.org/W2949226003","https://openalex.org/W2958659987","https://openalex.org/W2963097726","https://openalex.org/W2963473026","https://openalex.org/W2963747696","https://openalex.org/W2964547635","https://openalex.org/W2990761096","https://openalex.org/W2991820108","https://openalex.org/W2997006708","https://openalex.org/W2998367712","https://openalex.org/W2999204576","https://openalex.org/W3035136756","https://openalex.org/W3035367521","https://openalex.org/W3041202696","https://openalex.org/W3049366385","https://openalex.org/W6674201379","https://openalex.org/W6674330103","https://openalex.org/W6683204974","https://openalex.org/W6732837357","https://openalex.org/W6757677476","https://openalex.org/W6767036268","https://openalex.org/W6775420989","https://openalex.org/W6784098896","https://openalex.org/W6785282536"],"related_works":[],"abstract_inverted_index":{"Deep":[0],"reinforcement":[1,81],"learning":[2],"(DRL),":[3],"which":[4,112,194],"highly":[5],"depends":[6],"on":[7],"the":[8,21,96,122,131,142,150,158,183,199,202,206],"data":[9],"representation,":[10],"has":[11],"shown":[12],"its":[13],"potential":[14],"in":[15,26,65,80,137,167,192],"many":[16],"practical":[17],"decision-making":[18],"problems.":[19],"However,":[20],"process":[22],"of":[23,141,201],"acquiring":[24],"representations":[25,156],"DRL":[27,53,108,166,186],"is":[28,110],"easily":[29],"affected":[30],"by":[31],"interference":[32,64,123],"from":[33,157,182,218],"models,":[34],"and":[35,58,188],"moreover":[36],"leaves":[37],"unnecessary":[38,97],"parameters,":[39],"leading":[40],"to":[41,73,90,120],"control":[42,79],"performance":[43,216],"reduction.":[44],"In":[45],"this":[46],"article,":[47],"we":[48,67,84],"propose":[49,68],"a":[50,69,86,100,105],"double":[51,106],"sparse":[52,56,76,107,118,155,220],"via":[54],"multilayer":[55,70,159],"coding":[57],"nonconvex":[59,87],"regularized":[60],"pruning.":[61],"To":[62],"alleviate":[63],"DRL,":[66],"sparse-coding-structural":[71],"network":[72,145],"obtain":[74],"deep":[75,117,143,154,219],"representation":[77,119],"for":[78,169],"learning.":[82],"Furthermore,":[83],"employ":[85],"log":[88],"regularizer":[89],"promote":[91],"strong":[92],"sparsity,":[93],"efficiently":[94],"removing":[95],"weights":[98,128],"with":[99,153,174],"regularizer-based":[101],"pruning":[102],"scheme.":[103],"Hence,":[104],"algorithm":[109,208],"developed,":[111],"can":[113,162,209],"not":[114],"only":[115],"learn":[116],"reduce":[121,210],"but":[124],"also":[125],"remove":[126],"redundant":[127],"while":[129,214],"keeping":[130,215],"robust":[132],"performance.":[133],"The":[134],"experimental":[135],"results":[136],"five":[138],"benchmark":[139],"environments":[140],"q":[144],"(DQN)":[146],"architecture":[147],"demonstrate":[148],"that":[149],"proposed":[151,207],"method":[152],"sparse-coding":[160,185],"structure":[161],"outperform":[163],"existing":[164],"sparse-coding-based":[165],"control,":[168],"example,":[170],"completing":[171],"Mountain":[172],"Car":[173],"140.81":[175],"steps,":[176],"achieving":[177],"near":[178],"10%":[179],"reward":[180],"increase":[181],"single-layer":[184],"algorithm,":[187],"obtaining":[189],"286.08":[190],"scores":[191],"Catcher,":[193],"are":[195],"over":[196,211],"two":[197],"times":[198],"rewards":[200],"other":[203],"algorithms.":[204],"Moreover,":[205],"80%":[212],"parameters":[213],"improvements":[217],"representations.":[221]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":13},{"year":2023,"cited_by_count":10},{"year":2022,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2022-04-03T00:00:00"}