{"id":"https://openalex.org/W4401414713","doi":"https://doi.org/10.1109/icra57147.2024.10610197","title":"Extremum-Seeking Action Selection for Accelerating Policy Optimization","display_name":"Extremum-Seeking Action Selection for Accelerating Policy Optimization","publication_year":2024,"publication_date":"2024-05-13","ids":{"openalex":"https://openalex.org/W4401414713","doi":"https://doi.org/10.1109/icra57147.2024.10610197"},"language":"en","primary_location":{"id":"doi:10.1109/icra57147.2024.10610197","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icra57147.2024.10610197","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034281181","display_name":"Ya-Chien Chang","orcid":"https://orcid.org/0000-0003-3497-7971"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ya-Chien Chang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5081085178","display_name":"Sicun Gao","orcid":"https://orcid.org/0000-0003-2524-4960"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sicun Gao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5034281181"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13586163,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"80","issue":null,"first_page":"5141","last_page":"5147"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14083","display_name":"Extremum Seeking Control Systems","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14083","display_name":"Extremum Seeking Control Systems","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11178","display_name":"Receptor Mechanisms and Signaling","score":0.9478999972343445,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10217","display_name":"Cardiac electrophysiology and arrhythmias","score":0.9433000087738037,"subfield":{"id":"https://openalex.org/subfields/2705","display_name":"Cardiology and Cardiovascular Medicine"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/action-selection","display_name":"Action selection","score":0.6700308322906494},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.653652548789978},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6178752183914185},{"id":"https://openalex.org/keywords/action","display_name":"Action (physics)","score":0.5787846446037292},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.48054105043411255},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.19233214855194092},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1738247275352478},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.09762084484100342},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.09505394101142883},{"id":"https://openalex.org/keywords/neuroscience","display_name":"Neuroscience","score":0.06492331624031067}],"concepts":[{"id":"https://openalex.org/C166109690","wikidata":"https://www.wikidata.org/wiki/Q4677422","display_name":"Action selection","level":3,"score":0.6700308322906494},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.653652548789978},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6178752183914185},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.5787846446037292},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.48054105043411255},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.19233214855194092},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1738247275352478},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.09762084484100342},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.09505394101142883},{"id":"https://openalex.org/C169760540","wikidata":"https://www.wikidata.org/wiki/Q207011","display_name":"Neuroscience","level":1,"score":0.06492331624031067},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icra57147.2024.10610197","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/icra57147.2024.10610197","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Robotics and Automation (ICRA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":46,"referenced_works":["https://openalex.org/W1578294898","https://openalex.org/W2074219590","https://openalex.org/W2142916680","https://openalex.org/W2145952809","https://openalex.org/W2158782408","https://openalex.org/W2736601468","https://openalex.org/W2779977383","https://openalex.org/W2907916537","https://openalex.org/W2971979296","https://openalex.org/W2973229164","https://openalex.org/W3032398409","https://openalex.org/W3038825904","https://openalex.org/W3039737909","https://openalex.org/W3095669803","https://openalex.org/W3120786689","https://openalex.org/W3126321819","https://openalex.org/W3169408498","https://openalex.org/W3206620955","https://openalex.org/W3207110310","https://openalex.org/W4244150320","https://openalex.org/W4250979948","https://openalex.org/W4287017924","https://openalex.org/W4287662662","https://openalex.org/W4288091739","https://openalex.org/W4300443332","https://openalex.org/W4306999448","https://openalex.org/W4382203308","https://openalex.org/W4383502893","https://openalex.org/W4385430490","https://openalex.org/W6638018090","https://openalex.org/W6682262322","https://openalex.org/W6739193204","https://openalex.org/W6740092555","https://openalex.org/W6741002519","https://openalex.org/W6747092830","https://openalex.org/W6747473740","https://openalex.org/W6748839928","https://openalex.org/W6751620934","https://openalex.org/W6767327128","https://openalex.org/W6778875248","https://openalex.org/W6780559895","https://openalex.org/W6783852185","https://openalex.org/W6785204124","https://openalex.org/W6800089562","https://openalex.org/W6846310611","https://openalex.org/W6856924951"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2077555735","https://openalex.org/W2108112111","https://openalex.org/W2060421996"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1,153,161],"for":[2,16,112],"control":[3,28,43,92,160],"over":[4],"continuous":[5],"spaces":[6],"typically":[7],"uses":[8],"high-entropy":[9],"stochastic":[10,104],"policies,":[11,105],"such":[12,52],"as":[13,115],"Gaussian":[14],"distributions,":[15],"local":[17],"exploration":[18],"and":[19,110],"estimating":[20],"policy":[21,69,149],"gradient":[22],"to":[23,48,68,79,129,132,138,151],"optimize":[24],"performance.":[25],"Many":[26],"robotic":[27],"problems":[29],"deal":[30],"with":[31],"complex":[32],"unstable":[33],"dynamics,":[34],"where":[35],"applying":[36,136],"actions":[37,128],"that":[38,65],"are":[39],"off":[40],"the":[41,58,116,126,139],"feasible":[42],"manifolds":[44],"can":[45,143],"quickly":[46],"lead":[47],"undesirable":[49],"divergence.":[50],"In":[51],"cases,":[53],"most":[54],"samples":[55],"taken":[56],"from":[57,103],"ambient":[59],"action":[60,81,101],"space":[61],"generate":[62],"low-value":[63],"trajectories":[64],"hardly":[66],"contribute":[67],"improvement,":[70],"resulting":[71],"in":[72,83,147,158],"slow":[73],"or":[74],"failed":[75],"learning.":[76],"We":[77],"propose":[78],"improve":[80,125,152],"selection":[82],"this":[84],"model-free":[85],"RL":[86],"setting":[87],"by":[88],"introducing":[89],"additional":[90],"adaptive":[91],"steps":[93],"based":[94],"on":[95,120],"Extremum-Seeking":[96],"Control":[97],"(ESC).":[98],"On":[99],"each":[100],"sampled":[102,127],"we":[106,122,156],"apply":[107],"sinusoidal":[108],"perturbations":[109],"query":[111],"estimated":[113],"Q-values":[114],"response":[117],"signal.":[118],"Based":[119],"ESC,":[121],"then":[123],"dynamically":[124],"be":[130,144],"closer":[131],"nearby":[133],"optima":[134],"before":[135],"them":[137],"environment.":[140],"Our":[141],"methods":[142],"easily":[145],"added":[146],"standard":[148],"optimization":[150],"efficiency,":[154],"which":[155],"demonstrate":[157],"various":[159],"environments.":[162]},"counts_by_year":[],"updated_date":"2026-03-04T07:04:00.330322","created_date":"2025-10-10T00:00:00"}
