{"id":"https://openalex.org/W2766212724","doi":"https://doi.org/10.1587/transinf.2017edl8112","title":"A Study of Qualitative Knowledge-Based Exploration for Continuous Deep Reinforcement Learning","display_name":"A Study of Qualitative Knowledge-Based Exploration for Continuous Deep Reinforcement Learning","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2766212724","doi":"https://doi.org/10.1587/transinf.2017edl8112","mag":"2766212724"},"language":"en","primary_location":{"id":"doi:10.1587/transinf.2017edl8112","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2017edl8112","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E100.D/11/E100.D_2017EDL8112/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://www.jstage.jst.go.jp/article/transinf/E100.D/11/E100.D_2017EDL8112/_pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062460840","display_name":"Chenxi Li","orcid":"https://orcid.org/0000-0002-0963-4363"},"institutions":[{"id":"https://openalex.org/I4210163363","display_name":"PLA Army Engineering University","ror":"https://ror.org/05mgp8x93","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210163363"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenxi LI","raw_affiliation_strings":["Institute of Command Information System, PLA University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Command Information System, PLA University of Science and Technology","institution_ids":["https://openalex.org/I4210163363"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049926126","display_name":"Lei Cao","orcid":"https://orcid.org/0000-0001-9909-8607"},"institutions":[{"id":"https://openalex.org/I4210163363","display_name":"PLA Army Engineering University","ror":"https://ror.org/05mgp8x93","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210163363"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei CAO","raw_affiliation_strings":["Institute of Command Information System, PLA University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Command Information System, PLA University of Science and Technology","institution_ids":["https://openalex.org/I4210163363"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086697409","display_name":"Xiaoming Liu","orcid":"https://orcid.org/0000-0003-1130-6630"},"institutions":[{"id":"https://openalex.org/I4210163363","display_name":"PLA Army Engineering University","ror":"https://ror.org/05mgp8x93","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210163363"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoming LIU","raw_affiliation_strings":["Institute of Command Information System, PLA University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Command Information System, PLA University of Science and Technology","institution_ids":["https://openalex.org/I4210163363"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030992865","display_name":"Xiliang Chen","orcid":"https://orcid.org/0000-0001-5198-0932"},"institutions":[{"id":"https://openalex.org/I4210163363","display_name":"PLA Army Engineering University","ror":"https://ror.org/05mgp8x93","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210163363"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiliang CHEN","raw_affiliation_strings":["Institute of Command Information System, PLA University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Command Information System, PLA University of Science and Technology","institution_ids":["https://openalex.org/I4210163363"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072596372","display_name":"Zhixiong Xu","orcid":"https://orcid.org/0000-0003-0996-436X"},"institutions":[{"id":"https://openalex.org/I4210163363","display_name":"PLA Army Engineering University","ror":"https://ror.org/05mgp8x93","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210163363"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixiong XU","raw_affiliation_strings":["Institute of Command Information System, PLA University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Command Information System, PLA University of Science and Technology","institution_ids":["https://openalex.org/I4210163363"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100398081","display_name":"Yongliang Zhang","orcid":"https://orcid.org/0000-0002-8781-8504"},"institutions":[{"id":"https://openalex.org/I4210163363","display_name":"PLA Army Engineering University","ror":"https://ror.org/05mgp8x93","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210163363"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yongliang ZHANG","raw_affiliation_strings":["Institute of Command Information System, PLA University of Science and Technology"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Command Information System, PLA University of Science and Technology","institution_ids":["https://openalex.org/I4210163363"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.5064,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.75916371,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"E100.D","issue":"11","first_page":"2721","last_page":"2724"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13832","display_name":"Advanced Decision-Making Techniques","score":0.9545000195503235,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13832","display_name":"Advanced Decision-Making Techniques","score":0.9545000195503235,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11303","display_name":"Bayesian Modeling and Causal Inference","score":0.9463000297546387,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10050","display_name":"Multi-Criteria Decision Making","score":0.941100001335144,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9183974266052246},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8431582450866699},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6128974556922913},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.6090816855430603},{"id":"https://openalex.org/keywords/dilemma","display_name":"Dilemma","score":0.5956259965896606},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5005953311920166},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44783735275268555},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4154691696166992}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9183974266052246},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8431582450866699},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6128974556922913},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.6090816855430603},{"id":"https://openalex.org/C2778496695","wikidata":"https://www.wikidata.org/wiki/Q254128","display_name":"Dilemma","level":2,"score":0.5956259965896606},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5005953311920166},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44783735275268555},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4154691696166992},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1587/transinf.2017edl8112","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2017edl8112","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E100.D/11/E100.D_2017EDL8112/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1587/transinf.2017edl8112","is_oa":true,"landing_page_url":"https://doi.org/10.1587/transinf.2017edl8112","pdf_url":"https://www.jstage.jst.go.jp/article/transinf/E100.D/11/E100.D_2017EDL8112/_pdf","source":{"id":"https://openalex.org/S2486202937","display_name":"IEICE Transactions on Information and Systems","issn_l":"0916-8532","issn":["0916-8532","1745-1361"],"is_oa":true,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4320800604","host_organization_name":"Institute of Electronics, Information and Communication Engineers","host_organization_lineage":["https://openalex.org/P4320800604"],"host_organization_lineage_names":["Institute of Electronics, Information and Communication Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEICE Transactions on Information and Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2766212724.pdf","grobid_xml":"https://content.openalex.org/works/W2766212724.grobid-xml"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W193076044","https://openalex.org/W605348272","https://openalex.org/W1553483187","https://openalex.org/W2038914522","https://openalex.org/W2074272451","https://openalex.org/W2140584963","https://openalex.org/W2173248099","https://openalex.org/W2963302368","https://openalex.org/W4214717370"],"related_works":["https://openalex.org/W2280422768","https://openalex.org/W3143197806","https://openalex.org/W4252555497","https://openalex.org/W2350209916","https://openalex.org/W2475524763","https://openalex.org/W3121175838","https://openalex.org/W3016293053","https://openalex.org/W2401723157","https://openalex.org/W2784269775","https://openalex.org/W2952904874"],"abstract_inverted_index":{"As":[0],"an":[1,47],"important":[2],"method":[3],"to":[4,26,42,64,76],"solve":[5],"sequential":[6],"decision-making":[7],"problems,":[8],"reinforcement":[9,58,83],"learning":[10,59,98],"learns":[11],"the":[12,17,31,34,72,78,97],"policy":[13],"of":[14,30],"tasks":[15],"through":[16],"interaction":[18],"with":[19],"environment.":[20],"But":[21],"it":[22,70],"has":[23],"difficulties":[24],"scaling":[25],"large-scale":[27],"problems.":[28],"One":[29],"reasons":[32],"is":[33],"exploration":[35,74],"and":[36],"exploitation":[37],"dilemma":[38],"which":[39],"may":[40],"lead":[41],"inefficient":[43],"learning.":[44,84],"We":[45,68],"present":[46],"approach":[48,91],"that":[49,89],"addresses":[50],"this":[51],"shortcoming":[52],"by":[53],"introducing":[54],"qualitative":[55],"knowledge":[56],"into":[57],"using":[60],"cloud":[61],"control":[62],"systems":[63],"represent":[65],"\u2018if-then\u2019":[66],"rules.":[67],"use":[69],"as":[71],"heuristics":[73],"strategy":[75],"guide":[77],"action":[79],"selection":[80],"in":[81,96],"deep":[82],"Empirical":[85],"evaluation":[86],"results":[87],"show":[88],"our":[90],"can":[92],"make":[93],"significant":[94],"improvement":[95],"process.":[99]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
