{"id":"https://openalex.org/W1534844487","doi":"https://doi.org/10.1109/icsmc.2005.1571130","title":"An Actor-Critic Approach for Learning Cooperative Behaviors of Multiagent Seesaw Balancing Problems","display_name":"An Actor-Critic Approach for Learning Cooperative Behaviors of Multiagent Seesaw Balancing Problems","publication_year":2006,"publication_date":"2006-01-18","ids":{"openalex":"https://openalex.org/W1534844487","doi":"https://doi.org/10.1109/icsmc.2005.1571130","mag":"1534844487"},"language":"en","primary_location":{"id":"doi:10.1109/icsmc.2005.1571130","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2005.1571130","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2005 IEEE International Conference on Systems, Man and Cybernetics","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068195972","display_name":"Takashi Kawakami","orcid":"https://orcid.org/0000-0002-0011-2155"},"institutions":[{"id":"https://openalex.org/I104234503","display_name":"Hokkaido University of Science","ror":"https://ror.org/05gqsa340","country_code":"JP","type":"education","lineage":["https://openalex.org/I104234503"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"T. Kawakami","raw_affiliation_strings":["Department of Information Design, Hokkaido Institute of Technology, Sapporo, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Information Design, Hokkaido Institute of Technology, Sapporo, Japan","institution_ids":["https://openalex.org/I104234503"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5097197034","display_name":"M. Kinoshit","orcid":null},"institutions":[{"id":"https://openalex.org/I104234503","display_name":"Hokkaido University of Science","ror":"https://ror.org/05gqsa340","country_code":"JP","type":"education","lineage":["https://openalex.org/I104234503"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"M. Kinoshit","raw_affiliation_strings":["Department of Information Design, Hokkaido Institute of Technology, Sapporo, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Information Design, Hokkaido Institute of Technology, Sapporo, Japan","institution_ids":["https://openalex.org/I104234503"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074264738","display_name":"Naoki Takatori","orcid":"https://orcid.org/0000-0002-9215-9566"},"institutions":[{"id":"https://openalex.org/I4210089641","display_name":"National Institute of Technology, Asahikawa College","ror":"https://ror.org/009rhnd12","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210089641","https://openalex.org/I4210120810"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"N. Takatori","raw_affiliation_strings":["Asahikawa National College of Technology, Asahikawa, Japan"],"affiliations":[{"raw_affiliation_string":"Asahikawa National College of Technology, Asahikawa, Japan","institution_ids":["https://openalex.org/I4210089641"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103072870","display_name":"Momoko Watanabe","orcid":"https://orcid.org/0000-0001-5014-2849"},"institutions":[{"id":"https://openalex.org/I79669737","display_name":"Rakuno Gakuen University","ror":"https://ror.org/014rqt829","country_code":"JP","type":"education","lineage":["https://openalex.org/I79669737"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"M. Watanabe","raw_affiliation_strings":["Department of Environmental Management, Rakuno Gakuen University, Ebetsu, Japan"],"affiliations":[{"raw_affiliation_string":"Department of Environmental Management, Rakuno Gakuen University, Ebetsu, Japan","institution_ids":["https://openalex.org/I79669737"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005186079","display_name":"Makoto Furukawa","orcid":"https://orcid.org/0000-0001-5726-876X"},"institutions":[{"id":"https://openalex.org/I4210089641","display_name":"National Institute of Technology, Asahikawa College","ror":"https://ror.org/009rhnd12","country_code":"JP","type":"education","lineage":["https://openalex.org/I4210089641","https://openalex.org/I4210120810"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"M. Furukawa","raw_affiliation_strings":["Asahikawa National College of Technology, Asahikawa, Japan"],"affiliations":[{"raw_affiliation_string":"Asahikawa National College of Technology, Asahikawa, Japan","institution_ids":["https://openalex.org/I4210089641"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5068195972"],"corresponding_institution_ids":["https://openalex.org/I104234503"],"apc_list":null,"apc_paid":null,"fwci":0.9035,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.78917175,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"1","issue":null,"first_page":"109","last_page":"114"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12794","display_name":"Adaptive Dynamic Programming Control","score":0.9639999866485596,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/seesaw-molecular-geometry","display_name":"Seesaw molecular geometry","score":0.9211382269859314},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.709728479385376},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6807793378829956},{"id":"https://openalex.org/keywords/mobile-robot","display_name":"Mobile robot","score":0.6703402400016785},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6614395380020142},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5795656442642212},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5697637796401978},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5005373954772949},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4781440496444702},{"id":"https://openalex.org/keywords/multi-agent-system","display_name":"Multi-agent system","score":0.47071999311447144},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.43481001257896423},{"id":"https://openalex.org/keywords/distributed-computing","display_name":"Distributed computing","score":0.43090376257896423},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.14683985710144043},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.1178320050239563}],"concepts":[{"id":"https://openalex.org/C159762639","wikidata":"https://www.wikidata.org/wiki/Q2273845","display_name":"Seesaw molecular geometry","level":3,"score":0.9211382269859314},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.709728479385376},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6807793378829956},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.6703402400016785},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6614395380020142},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5795656442642212},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5697637796401978},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5005373954772949},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4781440496444702},{"id":"https://openalex.org/C41550386","wikidata":"https://www.wikidata.org/wiki/Q529909","display_name":"Multi-agent system","level":2,"score":0.47071999311447144},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.43481001257896423},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.43090376257896423},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.14683985710144043},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.1178320050239563},{"id":"https://openalex.org/C185544564","wikidata":"https://www.wikidata.org/wiki/Q81197","display_name":"Nuclear physics","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C186453547","wikidata":"https://www.wikidata.org/wiki/Q2126","display_name":"Neutrino","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icsmc.2005.1571130","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icsmc.2005.1571130","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2005 IEEE International Conference on Systems, Man and Cybernetics","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W1521427077","https://openalex.org/W1557517019","https://openalex.org/W1751034982","https://openalex.org/W2911283634","https://openalex.org/W2914656440","https://openalex.org/W4390708689","https://openalex.org/W6630994837","https://openalex.org/W6689723076"],"related_works":["https://openalex.org/W2497468103","https://openalex.org/W3147366289","https://openalex.org/W160677519","https://openalex.org/W3004732674","https://openalex.org/W2026558218","https://openalex.org/W1999832398","https://openalex.org/W3154683939","https://openalex.org/W2164797156","https://openalex.org/W2122871747","https://openalex.org/W3114279067"],"abstract_inverted_index":{"This":[0,41],"paper":[1],"proposes":[2],"a":[3,8,62,69,114,121,128,137],"new":[4],"approach":[5],"to":[6,49,103],"realize":[7],"reinforcement":[9],"learning":[10,151],"scheme":[11],"for":[12,54,127],"autonomous":[13,31],"multiple":[14,30,55],"agents":[15,24,85],"system.":[16,134],"In":[17],"our":[18,160],"approach,":[19],"we":[20],"treat":[21,104],"the":[22,35,51,73,98,124,132,144,157],"cooperative":[23],"systems":[25],"in":[26,95,141],"which":[27],"there":[28],"are":[29],"mobile":[32,56],"robots,":[33],"and":[34],"seesaw":[36,63,133],"balancing":[37],"task":[38],"is":[39,43,76,101,147],"given.":[40],"problem":[42],"an":[44],"example":[45],"of":[46,83,107,116,123,131,139,159],"corresponding":[47,129],"tasks":[48],"find":[50],"appropriate":[52],"locations":[53],"robots.":[57],"Each":[58,110],"robot":[59,84,111,125],"agent":[60,112],"on":[61,136],"keeps":[64],"being":[65],"balanced":[66],"state.":[67],"As":[68],"most":[70],"useful":[71],"algorithm,":[72],"Q-learning":[74],"method":[75,100],"well":[77],"known.":[78],"However,":[79],"feasible":[80],"action":[81,92],"values":[82,106],"must":[86],"be":[87],"categorized":[88],"into":[89],"some":[90],"discrete":[91],"values.":[93],"Therefore,":[94],"this":[96,142],"study,":[97],"actor-critic":[99,150],"applied":[102],"continuous":[105],"agents'":[108],"actions.":[109],"has":[113],"set":[115],"normal":[117,145],"distribution,":[118],"that":[119],"determines":[120],"distance":[122],"movement":[126,140],"state":[130],"Based":[135],"result":[138,155],"system,":[143],"distribution":[146],"modified":[148],"by":[149],"method.":[152,162],"The":[153],"simulation":[154],"shows":[156],"effectiveness":[158],"approaching":[161]},"counts_by_year":[{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
