{"id":"https://openalex.org/W1972108682","doi":"https://doi.org/10.1109/ccmb.2013.6609170","title":"Model-based reinforcement learning for humanoids: A study on forming rewards with the iCub platform","display_name":"Model-based reinforcement learning for humanoids: A study on forming rewards with the iCub platform","publication_year":2013,"publication_date":"2013-04-01","ids":{"openalex":"https://openalex.org/W1972108682","doi":"https://doi.org/10.1109/ccmb.2013.6609170","mag":"1972108682"},"language":"en","primary_location":{"id":"doi:10.1109/ccmb.2013.6609170","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ccmb.2013.6609170","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE Symposium on Computational Intelligence, Cognitive Algorithms, Mind, and Brain (CCMB)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/3430182","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5071895296","display_name":"Anestis Fachantidis","orcid":"https://orcid.org/0000-0003-2034-097X"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":true,"raw_author_name":"Anestis Fachantidis","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki, Greece#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","institution_ids":["https://openalex.org/I21370196"]},{"raw_affiliation_string":"Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki, Greece#TAB#","institution_ids":["https://openalex.org/I21370196"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073115503","display_name":"Alessandro Di Nuovo","orcid":"https://orcid.org/0000-0003-2677-2650"},"institutions":[{"id":"https://openalex.org/I897542642","display_name":"University of Plymouth","ror":"https://ror.org/008n7pv89","country_code":"GB","type":"education","lineage":["https://openalex.org/I897542642"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Alessandro Di Nuovo","raw_affiliation_strings":["Centre for Robotics and Neural Systems Plymouth University Drake Circus, Plymouth PL4 8AA, United Kingdom","Centre for Robot. & Neural Syst., Plymouth Univ., Plymouth, UK"],"affiliations":[{"raw_affiliation_string":"Centre for Robotics and Neural Systems Plymouth University Drake Circus, Plymouth PL4 8AA, United Kingdom","institution_ids":["https://openalex.org/I897542642"]},{"raw_affiliation_string":"Centre for Robot. & Neural Syst., Plymouth Univ., Plymouth, UK","institution_ids":["https://openalex.org/I897542642"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091768977","display_name":"Angelo Cangelosi","orcid":"https://orcid.org/0000-0002-4709-2243"},"institutions":[{"id":"https://openalex.org/I897542642","display_name":"University of Plymouth","ror":"https://ror.org/008n7pv89","country_code":"GB","type":"education","lineage":["https://openalex.org/I897542642"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Angelo Cangelosi","raw_affiliation_strings":["Centre for Robotics and Neural Systems, Plymouth University, Plymouth, UK","Centre for Robot. & Neural Syst., Plymouth Univ., Plymouth, UK"],"affiliations":[{"raw_affiliation_string":"Centre for Robotics and Neural Systems, Plymouth University, Plymouth, UK","institution_ids":["https://openalex.org/I897542642"]},{"raw_affiliation_string":"Centre for Robot. & Neural Syst., Plymouth Univ., Plymouth, UK","institution_ids":["https://openalex.org/I897542642"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066340205","display_name":"Ioannis Vlahavas","orcid":"https://orcid.org/0000-0003-3477-8825"},"institutions":[{"id":"https://openalex.org/I21370196","display_name":"Aristotle University of Thessaloniki","ror":"https://ror.org/02j61yw88","country_code":"GR","type":"education","lineage":["https://openalex.org/I21370196"]}],"countries":["GR"],"is_corresponding":false,"raw_author_name":"Ioannis Vlahavas","raw_affiliation_strings":["Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki, Greece#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Informatics, Aristotle University of Thessaloniki, Thessaloniki, Greece","institution_ids":["https://openalex.org/I21370196"]},{"raw_affiliation_string":"Dept. of Inf., Aristotle Univ. of Thessaloniki, Thessaloniki, Greece#TAB#","institution_ids":["https://openalex.org/I21370196"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5071895296"],"corresponding_institution_ids":["https://openalex.org/I21370196"],"apc_list":null,"apc_paid":null,"fwci":0.9885,"has_fulltext":false,"cited_by_count":8,"citation_normalized_percentile":{"value":0.81511252,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"87","last_page":"93"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11685","display_name":"Zebrafish Biomedical Research Applications","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1307","display_name":"Cell Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10879","display_name":"Robotic Locomotion and Control","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/2204","display_name":"Biomedical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/icub","display_name":"iCub","score":0.9944194555282593},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7610187530517578},{"id":"https://openalex.org/keywords/cognitive-robotics","display_name":"Cognitive robotics","score":0.7327970862388611},{"id":"https://openalex.org/keywords/robotics","display_name":"Robotics","score":0.7093523144721985},{"id":"https://openalex.org/keywords/developmental-robotics","display_name":"Developmental robotics","score":0.7020766735076904},{"id":"https://openalex.org/keywords/humanoid-robot","display_name":"Humanoid robot","score":0.6392092704772949},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.6378766894340515},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6059272289276123},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.5713497400283813},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4834519326686859},{"id":"https://openalex.org/keywords/robot-learning","display_name":"Robot learning","score":0.4644601047039032},{"id":"https://openalex.org/keywords/perception","display_name":"Perception","score":0.45408207178115845},{"id":"https://openalex.org/keywords/behavior-based-robotics","display_name":"Behavior-based robotics","score":0.45006832480430603},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.4358672797679901},{"id":"https://openalex.org/keywords/cognitive-architecture","display_name":"Cognitive architecture","score":0.43047839403152466},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.4202103614807129},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3499055504798889},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.15517497062683105},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.1177569329738617}],"concepts":[{"id":"https://openalex.org/C111151474","wikidata":"https://www.wikidata.org/wiki/Q1653368","display_name":"iCub","level":4,"score":0.9944194555282593},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7610187530517578},{"id":"https://openalex.org/C192327766","wikidata":"https://www.wikidata.org/wiki/Q1038799","display_name":"Cognitive robotics","level":3,"score":0.7327970862388611},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.7093523144721985},{"id":"https://openalex.org/C11207580","wikidata":"https://www.wikidata.org/wiki/Q5266817","display_name":"Developmental robotics","level":4,"score":0.7020766735076904},{"id":"https://openalex.org/C60692881","wikidata":"https://www.wikidata.org/wiki/Q584529","display_name":"Humanoid robot","level":3,"score":0.6392092704772949},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.6378766894340515},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6059272289276123},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.5713497400283813},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4834519326686859},{"id":"https://openalex.org/C188888258","wikidata":"https://www.wikidata.org/wiki/Q7353390","display_name":"Robot learning","level":4,"score":0.4644601047039032},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.45408207178115845},{"id":"https://openalex.org/C19766214","wikidata":"https://www.wikidata.org/wiki/Q4880688","display_name":"Behavior-based robotics","level":4,"score":0.45006832480430603},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.4358672797679901},{"id":"https://openalex.org/C20854674","wikidata":"https://www.wikidata.org/wiki/Q4386060","display_name":"Cognitive architecture","level":3,"score":0.43047839403152466},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.4202103614807129},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3499055504798889},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.15517497062683105},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.1177569329738617},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.0},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/ccmb.2013.6609170","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ccmb.2013.6609170","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2013 IEEE Symposium on Computational Intelligence, Cognitive Algorithms, Mind, and Brain (CCMB)","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/2835ef85-e684-4136-8929-953ae683bd99","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/2835ef85-e684-4136-8929-953ae683bd99","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Fachantidis, A, Di Nuovo, A, Cangelosi, A & Vlahavas, I 2013, 'Model-Based Reinforcement Learning for Humanoids: A Study on Forming Rewards with the iCub platform', pp. 87-93. < http://gateway.webofknowledge.com/gateway/Gateway.cgi?GWVersion=2&SrcApp=PARTNER_APP&SrcAuth=LinksAMR&KeyUT=WOS:000335266900014&DestLinkType=FullRecord&DestApp=ALL_WOS&UsrCustomerID=11bb513d99f797142bcfeffcc58ea008 >","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:zenodo.org:3430182","is_oa":true,"landing_page_url":"https://zenodo.org/record/3430182","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferencePaper"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:3430182","is_oa":true,"landing_page_url":"https://zenodo.org/record/3430182","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferencePaper"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W620953717","https://openalex.org/W1571143165","https://openalex.org/W1594783240","https://openalex.org/W1949804828","https://openalex.org/W1977655452","https://openalex.org/W2035215727","https://openalex.org/W2100119371","https://openalex.org/W2103048296","https://openalex.org/W2110292307","https://openalex.org/W2120772693","https://openalex.org/W2121863487","https://openalex.org/W2133120480","https://openalex.org/W2138671676","https://openalex.org/W2151547103","https://openalex.org/W2154284891","https://openalex.org/W2155352776","https://openalex.org/W2159554014","https://openalex.org/W2166725121","https://openalex.org/W2169771756","https://openalex.org/W2498075210","https://openalex.org/W2965916140","https://openalex.org/W3020747880","https://openalex.org/W4211089519","https://openalex.org/W4214717370","https://openalex.org/W6634240840","https://openalex.org/W6635508374","https://openalex.org/W6675223484","https://openalex.org/W6675759124","https://openalex.org/W6684849777"],"related_works":["https://openalex.org/W2795734790","https://openalex.org/W2142714010","https://openalex.org/W2077597530","https://openalex.org/W2097579122","https://openalex.org/W1972108682","https://openalex.org/W2018956394","https://openalex.org/W2462644085","https://openalex.org/W4235012568","https://openalex.org/W2996644777","https://openalex.org/W2413274964"],"abstract_inverted_index":{"Technological":[0],"advancements":[1],"in":[2,39,73,116,129,141,164],"robotics":[3,130],"and":[4,29,34,131,134,161],"cognitive":[5,16,58,111],"science":[6],"are":[7,21],"contributing":[8],"to":[9,23,27,35,53,63,89,166],"the":[10,13,25,81,84,91,110,137,171,182,185],"development":[11,93],"of":[12,15,48,83,94,113,170,178],"field":[14],"robotics.":[17,75],"Modern":[18],"robotic":[19,139],"platforms":[20],"able":[22],"exhibit":[24],"ability":[26],"learn":[28],"reason":[30],"about":[31],"complex":[32,40],"tasks":[33],"follow":[36],"behavioural":[37],"goals":[38],"environments.":[41],"Nevertheless,":[42],"many":[43],"challenges":[44,51],"still":[45],"exist.":[46],"One":[47],"these":[49,55],"great":[50],"is":[52,107],"equip":[54],"robots":[56],"with":[57,65,154,181],"systems":[59],"that":[60],"allow":[61],"them":[62],"deal":[64],"less":[66],"constrained":[67,70],"situations,":[68],"beyond":[69],"scenarios":[71],"as":[72],"industrial":[74],"In":[76,146],"this":[77,120],"work":[78],"we":[79,122,148],"explore":[80],"application":[82],"Reinforcement":[85],"Learning":[86],"(RL)":[87],"paradigm":[88],"study":[90],"autonomous":[92],"robot":[95,172,186],"controllers":[96],"without":[97],"a":[98,103,124],"priori":[99],"supervised":[100],"learning.":[101],"Such":[102],"model-based":[104],"RL":[105,115,128],"architecture":[106],"discussed":[108],"for":[109,127,136],"implications":[112],"applying":[114],"humanoid":[117],"robots.":[118],"To":[119],"end":[121],"show":[123],"developmental":[125],"framework":[126],"its":[132,175,188],"implementation":[133],"testing":[135],"iCub":[138,151],"platform":[140],"two":[142],"novel":[143],"experimental":[144],"scenarios.":[145],"particular":[147],"focus":[149],"on":[150],"simulation":[152],"experiments":[153],"comparisons":[155],"between":[156],"internal":[157],"perception-based":[158],"reward":[159],"signals":[160],"external":[162],"ones,":[163],"order":[165],"compare":[167],"learning":[168],"performance":[169],"guided":[173],"by":[174],"own":[176],"perception":[177],"action's":[179],"outcomes":[180],"one":[183],"when":[184],"has":[187],"actions":[189],"externally":[190],"evaluated.":[191]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2014,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2016-06-24T00:00:00"}