{"id":"https://openalex.org/W4398186451","doi":"https://doi.org/10.1145/3605098.3636028","title":"Reward Specifications in Collaborative Multi-agent Learning: A Comparative Study","display_name":"Reward Specifications in Collaborative Multi-agent Learning: A Comparative Study","publication_year":2024,"publication_date":"2024-04-08","ids":{"openalex":"https://openalex.org/W4398186451","doi":"https://doi.org/10.1145/3605098.3636028"},"language":"en","primary_location":{"id":"doi:10.1145/3605098.3636028","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3605098.3636028","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3605098.3636028","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},"type":"conference-paper","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3605098.3636028","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102818294","display_name":"Maram Hasan","orcid":"https://orcid.org/0000-0001-9040-5842"},"institutions":[{"id":"https://openalex.org/I154851008","display_name":"Indian Institute of Technology Roorkee","ror":"https://ror.org/00582g326","country_code":"IN","type":"education","lineage":["https://openalex.org/I154851008"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Maram Hasan","raw_affiliation_strings":["Computer Science and Engineering, Indian Institute of Technology Roorkee, Roorkee, India"],"raw_orcid":"https://orcid.org/0000-0001-9040-5842","affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, Indian Institute of Technology Roorkee, Roorkee, India","institution_ids":["https://openalex.org/I154851008"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049887458","display_name":"Rajdeep Niyogi","orcid":"https://orcid.org/0000-0003-1664-4882"},"institutions":[{"id":"https://openalex.org/I154851008","display_name":"Indian Institute of Technology Roorkee","ror":"https://ror.org/00582g326","country_code":"IN","type":"education","lineage":["https://openalex.org/I154851008"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Rajdeep Niyogi","raw_affiliation_strings":["Computer Science and Engineering, Indian Institute of Technology Roorkee, Roorkee, India"],"raw_orcid":"https://orcid.org/0000-0003-1664-4882","affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, Indian Institute of Technology Roorkee, Roorkee, India","institution_ids":["https://openalex.org/I154851008"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I154851008"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1007","last_page":"1013"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.9562000036239624,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7223039865493774},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.35558193922042847},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.34179919958114624}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7223039865493774},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.35558193922042847},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.34179919958114624}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3605098.3636028","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3605098.3636028","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3605098.3636028","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3605098.3636028","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3605098.3636028","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3605098.3636028","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 39th ACM/SIGAPP Symposium on Applied Computing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4398186451.pdf"},"referenced_works_count":9,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2189058185","https://openalex.org/W2560674852","https://openalex.org/W2604382266","https://openalex.org/W2606411279","https://openalex.org/W2915117209","https://openalex.org/W2963523627","https://openalex.org/W3096807772","https://openalex.org/W4300630456"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,5,31,35,52,87,99,162],"is":[2],"a":[3,47,74,119,129],"prominent":[4],"paradigm":[6],"that":[7,57,77],"seeks":[8],"to":[9,106],"maximize":[10],"cumulative":[11],"rewards":[12,174],"over":[13],"time.":[14],"Nevertheless,":[15],"some":[16],"real-life":[17],"problems":[18],"often":[19],"exhibit":[20],"inherent":[21],"sparsity":[22,126],"in":[23,33,50,88,128,190],"rewards,":[24],"which":[25,102],"pose":[26],"difficulties":[27],"for":[28,194],"standard":[29],"reinforcement":[30,98],"algorithms":[32],"efficiently":[34],"optimal":[36],"policies":[37],"without":[38],"frequent":[39],"feedback.":[40],"In":[41,91,138],"multi-agent":[42,89,97],"environments,":[43,112,132],"reward":[44,55,82,111,116,157,184],"specifications":[45,117],"play":[46],"crucial":[48],"role":[49],"collaborative":[51,86,197],"by":[53],"designing":[54],"structures":[56],"guide":[58],"agents":[59],"toward":[60],"desired":[61],"behaviors":[62],"and":[63,135,152,163,175,188],"effectively":[64],"addressing":[65],"the":[66,79,156,171,181],"challenge":[67],"of":[68,81,131,183],"sparse":[69],"rewards.":[70],"This":[71],"paper":[72],"presents":[73],"new":[75],"study":[76],"explores":[78],"impact":[80,159],"specification":[83],"techniques":[84,193],"on":[85,121,144],"environments.":[90],"our":[92],"experiments,":[93],"we":[94,140,169],"use":[95],"state-of-the-art":[96],"(MARL)":[100],"algorithms,":[101],"have":[103],"been":[104],"proven":[105],"be":[107],"effective":[108,192],"under":[109,125],"dense":[110],"along":[113],"with":[114,118],"different":[115],"focus":[120],"evaluating":[122],"their":[123],"performance":[124],"settings":[127],"variety":[130],"including":[133],"discrete":[134],"complex":[136],"scenarios.":[137],"addition,":[139],"provide":[141],"in-depth":[142],"insights":[143],"how":[145],"diverse":[146],"factors,":[147],"such":[148],"as":[149],"task":[150],"nature":[151],"information":[153],"availability,":[154],"influence":[155],"specification's":[158],"concerning":[160],"agent":[161],"coordination.":[164],"To":[165],"assess":[166],"these":[167],"aspects,":[168],"examine":[170],"average":[172],"team":[173],"convergence":[176],"speed.":[177],"The":[178],"results":[179],"highlight":[180],"importance":[182],"specifications,":[185],"aiding":[186],"researchers":[187],"practitioners":[189],"selecting":[191],"various":[195],"real-world":[196],"problems.":[198]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-07-14T23:27:15.235271","created_date":"2025-10-10T00:00:00"}