{"id":"https://openalex.org/W4392903961","doi":"https://doi.org/10.1109/icassp48485.2024.10448500","title":"A New Pre-Training Paradigm for Offline Multi-Agent Reinforcement Learning with Suboptimal Data","display_name":"A New Pre-Training Paradigm for Offline Multi-Agent Reinforcement Learning with Suboptimal Data","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903961","doi":"https://doi.org/10.1109/icassp48485.2024.10448500"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448500","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448500","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101156530","display_name":"Linghui Meng","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Linghui Meng","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Automation,China","School of Artificial Intelligence, University of Chinese Academy of Sciences, China","Institute of Automation, Chinese Academy of Sciences, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Automation,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052913961","display_name":"Xi Zhang","orcid":"https://orcid.org/0000-0002-1105-9417"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xi Zhang","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Automation,China","Institute of Automation, Chinese Academy of Sciences, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Automation,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101684094","display_name":"Dengpeng Xing","orcid":"https://orcid.org/0000-0002-8251-9118"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dengpeng Xing","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Automation,China","Institute of Automation, Chinese Academy of Sciences, China","School of Artificial Intelligence, University of Chinese Academy of Sciences, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Automation,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065635207","display_name":"Bo Xu","orcid":"https://orcid.org/0000-0001-6379-7617"},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210112150","display_name":"Institute of Automation","ror":"https://ror.org/022c3hy66","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210112150"]},{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Xu","raw_affiliation_strings":["Chinese Academy of Sciences,Institute of Automation,China","School of Artificial Intelligence, University of Chinese Academy of Sciences, China","Institute of Automation, Chinese Academy of Sciences, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,Institute of Automation,China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]},{"raw_affiliation_string":"School of Artificial Intelligence, University of Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210165038"]},{"raw_affiliation_string":"Institute of Automation, Chinese Academy of Sciences, China","institution_ids":["https://openalex.org/I4210112150","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6109,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.71074384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"7520","last_page":"7524"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.9810000061988831,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.9782000184059143,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8162988424301147},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8036222457885742},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6847202181816101},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6440467238426208},{"id":"https://openalex.org/keywords/software-deployment","display_name":"Software deployment","score":0.4851420819759369},{"id":"https://openalex.org/keywords/online-and-offline","display_name":"Online and offline","score":0.4347722828388214},{"id":"https://openalex.org/keywords/offline-learning","display_name":"Offline learning","score":0.4308333694934845},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.42756664752960205},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.42231616377830505},{"id":"https://openalex.org/keywords/online-learning","display_name":"Online learning","score":0.31193119287490845},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.09001338481903076},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.07742929458618164}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8162988424301147},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8036222457885742},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6847202181816101},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6440467238426208},{"id":"https://openalex.org/C105339364","wikidata":"https://www.wikidata.org/wiki/Q2297740","display_name":"Software deployment","level":2,"score":0.4851420819759369},{"id":"https://openalex.org/C2780102126","wikidata":"https://www.wikidata.org/wiki/Q10928179","display_name":"Online and offline","level":2,"score":0.4347722828388214},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.4308333694934845},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.42756664752960205},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.42231616377830505},{"id":"https://openalex.org/C2986087404","wikidata":"https://www.wikidata.org/wiki/Q15946010","display_name":"Online learning","level":2,"score":0.31193119287490845},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.09001338481903076},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.07742929458618164},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448500","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448500","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.4300000071525574,"display_name":"Reduced inequalities"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321133","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W2794643322","https://openalex.org/W2842511635","https://openalex.org/W3016525976","https://openalex.org/W3022566517","https://openalex.org/W3033324992","https://openalex.org/W3093784762","https://openalex.org/W3093963693","https://openalex.org/W3111077973","https://openalex.org/W3134226813","https://openalex.org/W3157552953","https://openalex.org/W3169291081","https://openalex.org/W4212774754","https://openalex.org/W4221150981","https://openalex.org/W4288594419","https://openalex.org/W4297808394","https://openalex.org/W4362590704","https://openalex.org/W4385245566","https://openalex.org/W6739901393","https://openalex.org/W6752338937","https://openalex.org/W6757469721","https://openalex.org/W6758846586","https://openalex.org/W6759312711","https://openalex.org/W6763704811","https://openalex.org/W6775647304","https://openalex.org/W6776438516","https://openalex.org/W6776601253","https://openalex.org/W6779265984","https://openalex.org/W6784178060","https://openalex.org/W6785034342","https://openalex.org/W6787511818","https://openalex.org/W6791533262","https://openalex.org/W6794625930","https://openalex.org/W6796289742","https://openalex.org/W6796880758","https://openalex.org/W6810764721"],"related_works":["https://openalex.org/W4225619808","https://openalex.org/W4400438570","https://openalex.org/W4221145086","https://openalex.org/W4376223516","https://openalex.org/W4226221094","https://openalex.org/W4226042081","https://openalex.org/W4388926065","https://openalex.org/W4387293922","https://openalex.org/W4388482290","https://openalex.org/W3207447243"],"abstract_inverted_index":{"Offline":[0],"multi-agent":[1,54,67,110,130],"reinforcement":[2],"learning":[3,38,93,113],"(MARL)":[4],"with":[5,80,133,165,170],"pre-training":[6,17,132],"paradigm,":[7],"which":[8,46],"uses":[9],"a":[10,62,66,98,104,115],"large":[11],"quantity":[12,142],"of":[13,50,83,122,143,172],"trajectories":[14,43],"for":[15,109],"offline":[16,71,144,154],"and":[18,76,180],"online":[19,79],"deployment,":[20],"has":[21],"become":[22],"fashionable":[23],"lately.":[24],"While":[25],"performing":[26],"well":[27],"on":[28,36],"various":[29,118],"tasks,":[30],"conventional":[31],"pre-trained":[32,51,70],"decision-making":[33],"models":[34],"based":[35],"imitation":[37],"typically":[39],"require":[40],"many":[41],"expert":[42,124,174],"or":[44],"demonstrations,":[45],"limits":[47],"the":[48,81,129,140,173,181],"development":[49],"policies":[52],"in":[53,163],"case.":[55],"To":[56],"address":[57],"this":[58,87],"problem,":[59],"we":[60,95,127],"propose":[61,96],"new":[63],"setting,":[64],"where":[65],"policy":[68,111,131],"is":[69],"using":[72],"suboptimal":[73],"(non-expert)":[74],"data":[75,120,175],"then":[77],"tested":[78],"expectation":[82],"high":[84],"rewards.":[85],"In":[86,157],"practical":[88],"setting":[89],"inspired":[90],"by":[91,177,184],"contrastive":[92],",":[94],"YANHUI,":[97],"simple":[99],"yet":[100],"effective":[101],"framework":[102],"utilizing":[103],"well-designed":[105],"reward":[106],"contrast":[107],"function":[108],"representation":[112],"from":[114],"dataset":[116],"including":[117],"reward-level":[119],"instead":[121],"just":[123],"trajectories.":[125],"Furthermore,":[126],"enrich":[128],"mixture-of-experts":[134],"to":[135],"dynamically":[136],"represent":[137],"it.":[138],"With":[139],"same":[141],"StarCraft":[145],"Multi-Agent":[146],"Challenge":[147],"datasets,":[148],"YANHUI":[149],"achieves":[150],"significant":[151],"improvements":[152],"over":[153],"MARL":[155],"baselines.":[156],"particular,":[158],"our":[159],"method":[160],"surprisingly":[161],"competes":[162],"performance":[164],"earlier":[166],"state-of-the-art":[167],"approaches,":[168],"even":[169],"10%":[171],"used":[176],"other":[178],"baselines":[179],"rest":[182],"replaced":[183],"poor":[185],"data.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
