{"id":"https://openalex.org/W1998556751","doi":"https://doi.org/10.1002/int.4550080805","title":"Reinforcement learning: Architectures and algorithms","display_name":"Reinforcement learning: Architectures and algorithms","publication_year":1993,"publication_date":"1993-01-01","ids":{"openalex":"https://openalex.org/W1998556751","doi":"https://doi.org/10.1002/int.4550080805","mag":"1998556751"},"language":"en","primary_location":{"id":"doi:10.1002/int.4550080805","is_oa":false,"landing_page_url":"https://doi.org/10.1002/int.4550080805","pdf_url":null,"source":{"id":"https://openalex.org/S57950554","display_name":"International Journal of Intelligent Systems","issn_l":"0884-8173","issn":["0884-8173","1098-111X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003723197","display_name":"Mieczyslaw M. Kokar","orcid":"https://orcid.org/0000-0001-9243-3089"},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mieczyslaw M. Kokar","raw_affiliation_strings":["Northeastern University, Boston, Massachusetts 02115","Northeastern University, Boston, Massachusetts  02115"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, Massachusetts 02115","institution_ids":["https://openalex.org/I12912129"]},{"raw_affiliation_string":"Northeastern University, Boston, Massachusetts  02115","institution_ids":["https://openalex.org/I12912129"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5110271014","display_name":"Spiridon A. Reveliotis","orcid":null},"institutions":[{"id":"https://openalex.org/I12912129","display_name":"Northeastern University","ror":"https://ror.org/04t5xt781","country_code":"US","type":"education","lineage":["https://openalex.org/I12912129"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Spiridon A. Reveliotis","raw_affiliation_strings":["Northeastern University, Boston, Massachusetts 02115","Northeastern University, Boston, Massachusetts  02115"],"affiliations":[{"raw_affiliation_string":"Northeastern University, Boston, Massachusetts 02115","institution_ids":["https://openalex.org/I12912129"]},{"raw_affiliation_string":"Northeastern University, Boston, Massachusetts  02115","institution_ids":["https://openalex.org/I12912129"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5003723197"],"corresponding_institution_ids":["https://openalex.org/I12912129"],"apc_list":{"value":2500,"currency":"USD","value_usd":2500},"apc_paid":null,"fwci":2.3285,"has_fulltext":false,"cited_by_count":16,"citation_normalized_percentile":{"value":0.88494113,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":"8","issue":"8","first_page":"875","last_page":"894"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12002","display_name":"Computability, Logic, AI Algorithms","score":0.9521999955177307,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.9004464745521545},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7164984345436096},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.5979165434837341},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.5769665837287903},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5759819149971008},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5182821154594421},{"id":"https://openalex.org/keywords/error-driven-learning","display_name":"Error-driven learning","score":0.45678362250328064},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.43150535225868225},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4165361225605011},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.16944265365600586},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09280163049697876},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.06502455472946167}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.9004464745521545},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7164984345436096},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.5979165434837341},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.5769665837287903},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5759819149971008},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5182821154594421},{"id":"https://openalex.org/C47932503","wikidata":"https://www.wikidata.org/wiki/Q5395689","display_name":"Error-driven learning","level":3,"score":0.45678362250328064},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.43150535225868225},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4165361225605011},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.16944265365600586},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09280163049697876},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.06502455472946167},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C66938386","wikidata":"https://www.wikidata.org/wiki/Q633538","display_name":"Structural engineering","level":1,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1002/int.4550080805","is_oa":false,"landing_page_url":"https://doi.org/10.1002/int.4550080805","pdf_url":null,"source":{"id":"https://openalex.org/S57950554","display_name":"International Journal of Intelligent Systems","issn_l":"0884-8173","issn":["0884-8173","1098-111X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320595","host_organization_name":"Wiley","host_organization_lineage":["https://openalex.org/P4310320595"],"host_organization_lineage_names":["Wiley"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.51.7347","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.51.7347","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://marshall.me.uiuc.edu/MISC/SpyrosRL-paper.ps","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1491843047","https://openalex.org/W1557517019","https://openalex.org/W1569296262","https://openalex.org/W1931792391","https://openalex.org/W2061361125","https://openalex.org/W2064018461","https://openalex.org/W2091565802","https://openalex.org/W2119717200","https://openalex.org/W2121905723","https://openalex.org/W6629953500","https://openalex.org/W6634103402","https://openalex.org/W6678155643","https://openalex.org/W6680905641","https://openalex.org/W6775686901"],"related_works":["https://openalex.org/W2371091044","https://openalex.org/W87513465","https://openalex.org/W2786230833","https://openalex.org/W2391666574","https://openalex.org/W2352650970","https://openalex.org/W1493952344","https://openalex.org/W4312372616","https://openalex.org/W8539471","https://openalex.org/W1544514152","https://openalex.org/W3203256658"],"abstract_inverted_index":{"This":[0,73],"article":[1,46,74],"is":[2,18,47],"related":[3],"to":[4,20,79,89,115,131,157],"the":[5,34,37,51,71,92,95,103,110,116,140,152,173,179,185],"research":[6,198],"effort":[7],"of":[8,40,53,84,94,112,123,139,148,169,172,178,191],"constructing":[9],"an":[10,59,189],"intelligent":[11],"agent,":[12],"i.e.,":[13,50],"a":[14,81],"computer":[15],"system":[16],"that":[17,91],"able":[19],"sense":[21],"its":[22,27],"environment":[23],"(world),":[24],"reason":[25],"utilizing":[26],"internal":[28],"knowledge":[29,56],"and":[30,87,128,175,195],"execute":[31],"actions":[32],"upon":[33,58],"world":[35],"(act).":[36],"specific":[38],"part":[39],"this":[41,45,200],"effor":[42],"presented":[43],"in":[44,151,199],"reinforcement":[48,85,96,124],"learning,":[49,86],"process":[52],"acquiring":[54],"new":[55],"based":[57],"evaluative":[60],"feedback,":[61],"called":[62],"reinforcement,":[63,161],"received":[64],"by":[65,102],"tht":[66],"agent":[67,181],"through":[68,109,167,182],"interactions":[69],"with":[70,159,163,188],"world.":[72],"has":[75,99],"two":[76],"objectives:":[77],"(1)":[78],"give":[80],"compact":[82],"overview":[83,190],"(2)":[88],"show":[90],"evolution":[93],"learning":[97,108,117,125,133,153,158,180],"paradigm":[98,142],"been":[100],"driven":[101],"need":[104],"for":[105,193,196],"more":[106,113],"efficient":[107],"addition":[111],"structure":[114],"agent.":[118],"Therefore,":[119],"both":[120],"main":[121],"ideas":[122],"are":[126,134,143],"introduced,":[127],"structural":[129],"solutions":[130,156],"reinforcemen":[132],"reviewed.":[135],"Several":[136],"architectural":[137,155],"enhancements":[138],"RL":[141],"discussed.":[144],"These":[145],"include":[146],"incorporation":[147],"state":[149],"information":[150],"process,":[154],"delayed":[160],"dealing":[162],"structurally":[164],"changing":[165],"worlds":[166],"utilization":[168],"multiple":[170],"models":[171],"world,":[174],"focusing":[176],"attention":[177],"active":[183],"perception.":[184],"paper":[186],"closes":[187],"directions":[192],"applications":[194],"future":[197],"area.":[201],"\u00a9":[202],"1993":[203],"John":[204],"Wiley":[205],"&":[206],"Sons,":[207],"Inc.":[208]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
