{"id":"https://openalex.org/W4385452159","doi":"https://doi.org/10.23919/ecc57647.2023.10178123","title":"Local Analysis of Entropy-Regularized Stochastic Soft-Max Policy Gradient Methods","display_name":"Local Analysis of Entropy-Regularized Stochastic Soft-Max Policy Gradient Methods","publication_year":2023,"publication_date":"2023-06-13","ids":{"openalex":"https://openalex.org/W4385452159","doi":"https://doi.org/10.23919/ecc57647.2023.10178123"},"language":"en","primary_location":{"id":"doi:10.23919/ecc57647.2023.10178123","is_oa":false,"landing_page_url":"http://dx.doi.org/10.23919/ecc57647.2023.10178123","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 European Control Conference (ECC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102879163","display_name":"Yuhao Ding","orcid":"https://orcid.org/0000-0002-1521-7566"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yuhao Ding","raw_affiliation_strings":["University of California at Berkeley,Department of Industrail Engineering and Operations Research","Department of Industrail Engineering and Operations Research, University of California at Berkeley"],"affiliations":[{"raw_affiliation_string":"University of California at Berkeley,Department of Industrail Engineering and Operations Research","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Industrail Engineering and Operations Research, University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089647297","display_name":"Junzi Zhang","orcid":"https://orcid.org/0000-0002-5086-0063"},"institutions":[{"id":"https://openalex.org/I91036609","display_name":"Citadel","ror":"https://ror.org/01vwr6t80","country_code":"US","type":"education","lineage":["https://openalex.org/I91036609"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Junzi Zhang","raw_affiliation_strings":["Citadel Securities (work done prior to joining Citadel Securities),Chicago,IL,USA,60603"],"affiliations":[{"raw_affiliation_string":"Citadel Securities (work done prior to joining Citadel Securities),Chicago,IL,USA,60603","institution_ids":["https://openalex.org/I91036609"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042580848","display_name":"Javad Lavaei","orcid":null},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Javad Lavaei","raw_affiliation_strings":["University of California at Berkeley,Department of Industrail Engineering and Operations Research","Department of Industrail Engineering and Operations Research, University of California at Berkeley"],"affiliations":[{"raw_affiliation_string":"University of California at Berkeley,Department of Industrail Engineering and Operations Research","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"Department of Industrail Engineering and Operations Research, University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102879163"],"corresponding_institution_ids":["https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.08326607,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11612","display_name":"Stochastic Gradient Optimization Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12056","display_name":"Markov Chains and Monte Carlo Methods","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.49569761753082275},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4505329132080078},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.4283583462238312},{"id":"https://openalex.org/keywords/statistical-physics","display_name":"Statistical physics","score":0.4160710275173187},{"id":"https://openalex.org/keywords/applied-mathematics","display_name":"Applied mathematics","score":0.3608796000480652},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.35424673557281494},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.16506171226501465},{"id":"https://openalex.org/keywords/thermodynamics","display_name":"Thermodynamics","score":0.09634339809417725}],"concepts":[{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.49569761753082275},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4505329132080078},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4283583462238312},{"id":"https://openalex.org/C121864883","wikidata":"https://www.wikidata.org/wiki/Q677916","display_name":"Statistical physics","level":1,"score":0.4160710275173187},{"id":"https://openalex.org/C28826006","wikidata":"https://www.wikidata.org/wiki/Q33521","display_name":"Applied mathematics","level":1,"score":0.3608796000480652},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.35424673557281494},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.16506171226501465},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.09634339809417725}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/ecc57647.2023.10178123","is_oa":false,"landing_page_url":"http://dx.doi.org/10.23919/ecc57647.2023.10178123","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 European Control Conference (ECC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":49,"referenced_works":["https://openalex.org/W1993411524","https://openalex.org/W2099111195","https://openalex.org/W2554120691","https://openalex.org/W2609650878","https://openalex.org/W2783096200","https://openalex.org/W2951507724","https://openalex.org/W2969951231","https://openalex.org/W2978644431","https://openalex.org/W2998050631","https://openalex.org/W3012939900","https://openalex.org/W3036276779","https://openalex.org/W3038915804","https://openalex.org/W3103047293","https://openalex.org/W3109546547","https://openalex.org/W3127686539","https://openalex.org/W3132054471","https://openalex.org/W3133371913","https://openalex.org/W3171210634","https://openalex.org/W3212337025","https://openalex.org/W3217314940","https://openalex.org/W4214717370","https://openalex.org/W4221157223","https://openalex.org/W4320473289","https://openalex.org/W4382318857","https://openalex.org/W4401142486","https://openalex.org/W6692846177","https://openalex.org/W6734206676","https://openalex.org/W6734517396","https://openalex.org/W6736495777","https://openalex.org/W6747473740","https://openalex.org/W6763002318","https://openalex.org/W6767052781","https://openalex.org/W6769196924","https://openalex.org/W6775255735","https://openalex.org/W6776824048","https://openalex.org/W6779228417","https://openalex.org/W6780386840","https://openalex.org/W6780486084","https://openalex.org/W6782593158","https://openalex.org/W6786084949","https://openalex.org/W6791168167","https://openalex.org/W6791509694","https://openalex.org/W6794588271","https://openalex.org/W6802333366","https://openalex.org/W6803111450","https://openalex.org/W6809872370","https://openalex.org/W6840151816","https://openalex.org/W6869630987","https://openalex.org/W7055423279"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W1979597421","https://openalex.org/W2007980826","https://openalex.org/W2061531152","https://openalex.org/W3002753104","https://openalex.org/W2077600819","https://openalex.org/W2142036596","https://openalex.org/W2072657027","https://openalex.org/W2962838298","https://openalex.org/W2180033908"],"abstract_inverted_index":{"Entropy":[0],"regularization":[1,58],"is":[2],"an":[3],"efficient":[4],"technique":[5],"for":[6,96],"encouraging":[7],"exploration":[8],"and":[9,59,92],"preventing":[10],"a":[11,101],"premature":[12],"convergence":[13,49,91],"of":[14,27,37,50],"(vanilla)":[15],"policy":[16,54,70],"gradient":[17,39,55],"methods":[18],"in":[19],"reinforcement":[20],"learning":[21],"(RL).":[22],"However,":[23],"the":[24,35,48,64,68,74,88,97],"theoretical":[25],"understanding":[26],"entropy-regularized":[28],"RL":[29],"algorithms":[30],"has":[31],"been":[32],"limited":[33],"by":[34],"assumption":[36],"exact":[38],"oracles.":[40],"To":[41],"go":[42],"beyond":[43],"this":[44],"limitation,":[45],"we":[46,86],"study":[47],"stochastic":[51],"soft-max":[52],"vanilla":[53],"with":[56,82],"entropy":[57],"prove":[60],"how":[61],"to":[62,71],"utilize":[63],"curvature":[65],"information":[66],"around":[67],"optimal":[69],"guarantee":[72],"that":[73],"action":[75],"probabilities":[76],"will":[77],"still":[78],"remain":[79],"uniformly":[80],"bounded":[81],"high":[83],"probability.":[84],"Moreover,":[85],"develop":[87],"\u201clast":[89],"iterate\u201d":[90],"sample":[93],"complexity":[94],"result":[95],"proposed":[98],"algorithm":[99],"given":[100],"good":[102],"initialization.":[103]},"counts_by_year":[],"updated_date":"2025-12-21T01:58:51.020947","created_date":"2025-10-10T00:00:00"}
