{"id":"https://openalex.org/W3178853440","doi":"https://doi.org/10.1109/access.2021.3095392","title":"Speeding-Up Action Learning in a Social Robot With Dyna-Q+: A Bioinspired Probabilistic Model Approach","display_name":"Speeding-Up Action Learning in a Social Robot With Dyna-Q+: A Bioinspired Probabilistic Model Approach","publication_year":2021,"publication_date":"2021-01-01","ids":{"openalex":"https://openalex.org/W3178853440","doi":"https://doi.org/10.1109/access.2021.3095392","mag":"3178853440"},"language":"en","primary_location":{"id":"doi:10.1109/access.2021.3095392","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3095392","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09476021.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09476021.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018501285","display_name":"Marcos Maroto\u2010G\u00f3mez","orcid":"https://orcid.org/0000-0001-9576-1731"},"institutions":[{"id":"https://openalex.org/I50357001","display_name":"Universidad Carlos III de Madrid","ror":"https://ror.org/03ths8210","country_code":"ES","type":"education","lineage":["https://openalex.org/I50357001"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Marcos Maroto-Gomez","raw_affiliation_strings":["Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I50357001"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027415301","display_name":"Rodrigo A. Gonz\u00e1lez","orcid":"https://orcid.org/0000-0002-5106-2784"},"institutions":[{"id":"https://openalex.org/I50357001","display_name":"Universidad Carlos III de Madrid","ror":"https://ror.org/03ths8210","country_code":"ES","type":"education","lineage":["https://openalex.org/I50357001"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Rodrigo Gonzalez","raw_affiliation_strings":["Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I50357001"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066362955","display_name":"\u00c1lvaro Castro\u2010Gonz\u00e1lez","orcid":"https://orcid.org/0000-0002-5189-0002"},"institutions":[{"id":"https://openalex.org/I50357001","display_name":"Universidad Carlos III de Madrid","ror":"https://ror.org/03ths8210","country_code":"ES","type":"education","lineage":["https://openalex.org/I50357001"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Alvaro Castro-Gonzalez","raw_affiliation_strings":["Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I50357001"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060557478","display_name":"Mar\u00eda Malf\u00e1z","orcid":"https://orcid.org/0000-0003-2317-3329"},"institutions":[{"id":"https://openalex.org/I50357001","display_name":"Universidad Carlos III de Madrid","ror":"https://ror.org/03ths8210","country_code":"ES","type":"education","lineage":["https://openalex.org/I50357001"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Maria Malfaz","raw_affiliation_strings":["Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I50357001"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077065409","display_name":"Miguel \u00c1. Salichs","orcid":"https://orcid.org/0000-0002-0263-6606"},"institutions":[{"id":"https://openalex.org/I50357001","display_name":"Universidad Carlos III de Madrid","ror":"https://ror.org/03ths8210","country_code":"ES","type":"education","lineage":["https://openalex.org/I50357001"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Miguel Angel Salichs","raw_affiliation_strings":["Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain"],"affiliations":[{"raw_affiliation_string":"Department of Systems Engineering and Automation, University Carlos III of Madrid, Madrid, Spain","institution_ids":["https://openalex.org/I50357001"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5018501285"],"corresponding_institution_ids":["https://openalex.org/I50357001"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":1.2599,"has_fulltext":true,"cited_by_count":12,"citation_normalized_percentile":{"value":0.83644863,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"9","issue":null,"first_page":"98381","last_page":"98397"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9682000279426575,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10709","display_name":"Social Robot Interaction and HRI","score":0.9672999978065491,"subfield":{"id":"https://openalex.org/subfields/3207","display_name":"Social Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.7033461332321167},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6805641651153564},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6768282651901245},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot learning","score":0.6231390237808228},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5509251952171326},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5283458828926086},{"id":"https://openalex.org/keywords/social-robot","display_name":"Social robot","score":0.4959891736507416},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.48388561606407166},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.4766698181629181},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44507160782814026},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.4355280101299286},{"id":"https://openalex.org/keywords/social-learning","display_name":"Social learning","score":0.41716018319129944},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.4160321056842804},{"id":"https://openalex.org/keywords/robot-control","display_name":"Robot control","score":0.29103222489356995}],"concepts":[{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.7033461332321167},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6805641651153564},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6768282651901245},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.6231390237808228},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5509251952171326},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5283458828926086},{"id":"https://openalex.org/C162947575","wikidata":"https://www.wikidata.org/wiki/Q2005645","display_name":"Social robot","level":5,"score":0.4959891736507416},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.48388561606407166},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.4766698181629181},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44507160782814026},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.4355280101299286},{"id":"https://openalex.org/C79416737","wikidata":"https://www.wikidata.org/wiki/Q2305519","display_name":"Social learning","level":2,"score":0.41716018319129944},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.4160321056842804},{"id":"https://openalex.org/C65401140","wikidata":"https://www.wikidata.org/wiki/Q7353385","display_name":"Robot control","level":4,"score":0.29103222489356995},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C50522688","wikidata":"https://www.wikidata.org/wiki/Q189833","display_name":"Economic growth","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/access.2021.3095392","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3095392","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09476021.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:f6cc53301c8a4524838547dc0f94d4bb","is_oa":true,"landing_page_url":"https://doaj.org/article/f6cc53301c8a4524838547dc0f94d4bb","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 9, Pp 98381-98397 (2021)","raw_type":"article"},{"id":"pmh:oai:e-archivo.uc3m.es:10016/35377","is_oa":true,"landing_page_url":"http://hdl.handle.net/10016/35377","pdf_url":null,"source":{"id":"https://openalex.org/S4306400817","display_name":"e-Archivo (Carlos III University of Madrid)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I50357001","host_organization_name":"Universidad Carlos III de Madrid","host_organization_lineage":["https://openalex.org/I50357001"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.1109/access.2021.3095392","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2021.3095392","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/9312710/09476021.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.47999998927116394}],"awards":[{"id":"https://openalex.org/G146716404","display_name":null,"funder_award_id":"S2018/NMT-4331","funder_id":"https://openalex.org/F4320313831","funder_display_name":"Comunidad de Madrid"},{"id":"https://openalex.org/G1718757999","display_name":null,"funder_award_id":"S2018/NMT-4331","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G5130045262","display_name":null,"funder_award_id":"RoboCity2030-DIH-CM","funder_id":"https://openalex.org/F4320313831","funder_display_name":"Comunidad de Madrid"},{"id":"https://openalex.org/G8051717526","display_name":null,"funder_award_id":"Grant","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320313831","display_name":"Comunidad de Madrid","ror":null},{"id":"https://openalex.org/F4320315062","display_name":"Ministerio de Ciencia, Innovaci\u00f3n y Universidades","ror":null},{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3178853440.pdf","grobid_xml":"https://content.openalex.org/works/W3178853440.grobid-xml"},"referenced_works_count":68,"referenced_works":["https://openalex.org/W14005205","https://openalex.org/W32403112","https://openalex.org/W169931978","https://openalex.org/W189976719","https://openalex.org/W1569173427","https://openalex.org/W1970491986","https://openalex.org/W1970933967","https://openalex.org/W1980035368","https://openalex.org/W1986014385","https://openalex.org/W1991204540","https://openalex.org/W1991564165","https://openalex.org/W2002288980","https://openalex.org/W2005250710","https://openalex.org/W2031904986","https://openalex.org/W2064012529","https://openalex.org/W2065219958","https://openalex.org/W2107726111","https://openalex.org/W2111438996","https://openalex.org/W2113913482","https://openalex.org/W2118123110","https://openalex.org/W2121863487","https://openalex.org/W2132417839","https://openalex.org/W2148427592","https://openalex.org/W2151717183","https://openalex.org/W2154543439","https://openalex.org/W2157936707","https://openalex.org/W2165332998","https://openalex.org/W2165934840","https://openalex.org/W2166836680","https://openalex.org/W2238007812","https://openalex.org/W2556764949","https://openalex.org/W2570651606","https://openalex.org/W2591951582","https://openalex.org/W2592373391","https://openalex.org/W2604216058","https://openalex.org/W2768314414","https://openalex.org/W2775496038","https://openalex.org/W2798494119","https://openalex.org/W2801662919","https://openalex.org/W2808171219","https://openalex.org/W2808281222","https://openalex.org/W2887260263","https://openalex.org/W2887320834","https://openalex.org/W2894609524","https://openalex.org/W2895879715","https://openalex.org/W2905180035","https://openalex.org/W2909011093","https://openalex.org/W2909564046","https://openalex.org/W2964319688","https://openalex.org/W2966477753","https://openalex.org/W2981759303","https://openalex.org/W2999888342","https://openalex.org/W3001333097","https://openalex.org/W3003996862","https://openalex.org/W3019139240","https://openalex.org/W3025028236","https://openalex.org/W3081678145","https://openalex.org/W3082865817","https://openalex.org/W3087814763","https://openalex.org/W3128086886","https://openalex.org/W3128384888","https://openalex.org/W4214717370","https://openalex.org/W4230981265","https://openalex.org/W4285719527","https://openalex.org/W6607730511","https://openalex.org/W6750779175","https://openalex.org/W6752345289","https://openalex.org/W6776538114"],"related_works":["https://openalex.org/W3096874164","https://openalex.org/W4376605461","https://openalex.org/W2357975469","https://openalex.org/W2136202932","https://openalex.org/W3087814763","https://openalex.org/W2892507673","https://openalex.org/W2361647908","https://openalex.org/W2937181779","https://openalex.org/W2537866915","https://openalex.org/W3127551068"],"abstract_inverted_index":{"Robotic":[0],"systems":[1],"that":[2],"are":[3],"developed":[4],"for":[5],"social":[6,85,163],"and":[7,37,47,124,144],"dynamic":[8,183],"environments":[9],"require":[10],"adaptive":[11],"mechanisms":[12],"to":[13,121,166,172],"successfully":[14],"operate.":[15],"Consequently,":[16],"learning":[17,53,82,96,149],"from":[18],"rewards":[19],"has":[20],"provided":[21],"meaningful":[22],"results":[23],"in":[24,60,139,152,161,181],"applications":[25],"involving":[26],"human-robot":[27,132],"interaction.":[28,73],"In":[29,74],"those":[30],"cases":[31],"where":[32],"the":[33,38,52,80,109,119,127,148,170],"robot's":[34],"state":[35,178],"space":[36],"number":[39],"of":[40,83,108,141],"actions":[41],"is":[42,57,69],"extensive,":[43],"dimensionality":[44],"becomes":[45],"intractable":[46],"this":[48,75,153],"drastically":[49],"slows":[50],"down":[51],"process.":[54,150],"This":[55,111],"effect":[56],"specially":[58],"notorious":[59],"one-step":[61],"temporal":[62,93],"difference":[63,94],"methods":[64],"because":[65],"just":[66],"one":[67],"update":[68],"performed":[70],"per":[71],"robot-environment":[72],"paper,":[76],"we":[77,114,155],"prove":[78],"how":[79],"action-based":[81],"a":[84,105,158,182],"robot":[86,120],"can":[87],"be":[88],"improved":[89],"by":[90],"combining":[91],"classical":[92,137],"reinforcement":[95],"methods,":[97],"such":[98],"as":[99],"Q-learning":[100],"or":[101],"Q(":[102],"\u03bb),":[103],"with":[104,169],"probabilistic":[106],"model":[107],"environment.":[110,184],"architecture,":[112],"which":[113,146],"have":[115,156],"called":[116],"Dyna,":[117],"allows":[118],"simultaneously":[122],"act":[123],"plan":[125],"using":[126],"experience":[128],"obtained":[129],"during":[130],"real":[131],"interactions.":[133],"Principally,":[134],"Dyna":[135,159],"improves":[136],"algorithms":[138],"terms":[140],"convergence":[142],"speed":[143],"stability,":[145],"strengthens":[147],"Hence,":[151],"work":[154],"embedded":[157],"architecture":[160],"our":[162],"robot,":[164],"Mini,":[165],"endow":[167],"it":[168],"ability":[171],"autonomously":[173],"maintain":[174],"an":[175],"optimal":[176],"internal":[177],"while":[179],"living":[180]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2021-07-19T00:00:00"}
