{"id":"https://openalex.org/W4400946884","doi":"https://doi.org/10.23919/ecc64448.2024.10590764","title":"Distributed Multi-Agent Gradient Based Q-Learning with Linear Function Approximation","display_name":"Distributed Multi-Agent Gradient Based Q-Learning with Linear Function Approximation","publication_year":2024,"publication_date":"2024-06-25","ids":{"openalex":"https://openalex.org/W4400946884","doi":"https://doi.org/10.23919/ecc64448.2024.10590764"},"language":"en","primary_location":{"id":"doi:10.23919/ecc64448.2024.10590764","is_oa":false,"landing_page_url":"http://dx.doi.org/10.23919/ecc64448.2024.10590764","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 European Control Conference (ECC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055532417","display_name":"Milo\u0161 S. Stankovi\u0107","orcid":"https://orcid.org/0000-0001-9064-7059"},"institutions":[{"id":"https://openalex.org/I170253739","display_name":"Singidunum University","ror":"https://ror.org/017v7rz39","country_code":"RS","type":"education","lineage":["https://openalex.org/I170253739"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Milo\u0161 S. Stankovi\u0107","raw_affiliation_strings":["Singidunum University,Belgrade,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Singidunum University,Belgrade,Serbia","institution_ids":["https://openalex.org/I170253739"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020552563","display_name":"Marko Beko","orcid":"https://orcid.org/0000-0001-7315-8739"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marko Beko","raw_affiliation_strings":["Universidade Lus&#x00F3;fona,COPELABS,Lisboa,Portugal"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidade Lus&#x00F3;fona,COPELABS,Lisboa,Portugal","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103386989","display_name":"Srdjan S. Stankovi\u0107","orcid":null},"institutions":[{"id":"https://openalex.org/I4068193","display_name":"University of Belgrade","ror":"https://ror.org/02qsmb048","country_code":"RS","type":"education","lineage":["https://openalex.org/I4068193"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Srdjan S. Stankovi\u0107","raw_affiliation_strings":["School of Electrical Engineering, University of Belgrade,Serbia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Electrical Engineering, University of Belgrade,Serbia","institution_ids":["https://openalex.org/I4068193"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3055,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.62877284,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2500","last_page":"2505"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9397000074386597,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12676","display_name":"Machine Learning and ELM","score":0.9397000074386597,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10057","display_name":"Face and Expression Recognition","score":0.9067000150680542,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5967727899551392},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5329089760780334},{"id":"https://openalex.org/keywords/linear-approximation","display_name":"Linear approximation","score":0.5299678444862366},{"id":"https://openalex.org/keywords/function-approximation","display_name":"Function approximation","score":0.5191566348075867},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24163737893104553},{"id":"https://openalex.org/keywords/nonlinear-system","display_name":"Nonlinear system","score":0.18813017010688782},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.14876914024353027},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.10512903332710266}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5967727899551392},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5329089760780334},{"id":"https://openalex.org/C160824197","wikidata":"https://www.wikidata.org/wiki/Q2071054","display_name":"Linear approximation","level":3,"score":0.5299678444862366},{"id":"https://openalex.org/C91873725","wikidata":"https://www.wikidata.org/wiki/Q3445816","display_name":"Function approximation","level":3,"score":0.5191566348075867},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24163737893104553},{"id":"https://openalex.org/C158622935","wikidata":"https://www.wikidata.org/wiki/Q660848","display_name":"Nonlinear system","level":2,"score":0.18813017010688782},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.14876914024353027},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.10512903332710266},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/ecc64448.2024.10590764","is_oa":false,"landing_page_url":"http://dx.doi.org/10.23919/ecc64448.2024.10590764","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 European Control Conference (ECC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7940359700","display_name":null,"funder_award_id":"2022.07530.CEECIND,UIDB/04111/2020","funder_id":"https://openalex.org/F4320334779","funder_display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia"}],"funders":[{"id":"https://openalex.org/F4320329307","display_name":"Science Fund of the Republic of Serbia","ror":null},{"id":"https://openalex.org/F4320334779","display_name":"Funda\u00e7\u00e3o para a Ci\u00eancia e a Tecnologia","ror":"https://ror.org/00snfqn58"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W32403112","https://openalex.org/W41554520","https://openalex.org/W1098911146","https://openalex.org/W1499021337","https://openalex.org/W2071983464","https://openalex.org/W2075268401","https://openalex.org/W2077274021","https://openalex.org/W2106471678","https://openalex.org/W2132351269","https://openalex.org/W2139418546","https://openalex.org/W2144672231","https://openalex.org/W2145339207","https://openalex.org/W2151661095","https://openalex.org/W2267629510","https://openalex.org/W2768629321","https://openalex.org/W2776728688","https://openalex.org/W2953189830","https://openalex.org/W2966162740","https://openalex.org/W3010758700","https://openalex.org/W3013922756","https://openalex.org/W3023701936","https://openalex.org/W3103113203","https://openalex.org/W3135895062","https://openalex.org/W4205326910","https://openalex.org/W4300128663","https://openalex.org/W4315472286","https://openalex.org/W4321768300","https://openalex.org/W4381384918","https://openalex.org/W6600556750","https://openalex.org/W6677370284","https://openalex.org/W6746869544","https://openalex.org/W6762191990","https://openalex.org/W6766912882","https://openalex.org/W6776877641","https://openalex.org/W6785462110","https://openalex.org/W6810918192"],"related_works":["https://openalex.org/W2167394514","https://openalex.org/W2518491366","https://openalex.org/W2144287108","https://openalex.org/W2123979492","https://openalex.org/W2123358778","https://openalex.org/W2062806575","https://openalex.org/W2153714959","https://openalex.org/W2235525867","https://openalex.org/W3113145869","https://openalex.org/W4289355352"],"abstract_inverted_index":{"In":[0],"this":[1],"paper":[2],"we":[3],"propose":[4],"a":[5,71,95],"novel":[6],"distributed":[7],"gradient-based":[8],"two-time-scale":[9],"algorithm":[10,30,46,91],"for":[11,66],"multi-agent":[12],"off-policy":[13,41],"learning":[14,45],"of":[15,18,99,110],"linear":[16,72],"approximation":[17],"the":[19,53,56,84,89,100,111],"optimal":[20],"action-value":[21],"function":[22],"(Q-function)":[23],"in":[24],"Markov":[25],"decision":[26],"processes":[27],"(MDPs).":[28],"The":[29],"is":[31,78],"composed":[32],"of:":[33],"1)":[34],"local":[35],"parameter":[36,85],"updates":[37],"based":[38],"on":[39],"an":[40],"gradient":[42],"temporal":[43],"difference":[44],"with":[47],"target":[48],"policy":[49],"belonging":[50],"to":[51,94],"either":[52],"greedy":[54],"or":[55],"Gibbs":[57],"distribution":[58],"class":[59],"and":[60,69],"stationary":[61],"behavior":[62],"policies":[63],"possibly":[64],"different":[65],"each":[67],"agent,":[68],"2)":[70],"stochastic":[73],"time-varying":[74],"consensus":[75],"scheme.":[76],"It":[77],"proved,":[79],"under":[80],"general":[81],"assumptions,":[82],"that":[83],"estimates":[86],"generated":[87],"by":[88],"proposed":[90,112],"weakly":[92],"converge":[93],"bounded":[96],"invariant":[97],"set":[98],"corresponding":[101],"ordinary":[102],"differential":[103],"equation":[104],"(ODE).":[105],"Simulation":[106],"results":[107],"illustrate":[108],"effectiveness":[109],"algorithm.":[113]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
