{"id":"https://openalex.org/W2039013024","doi":"https://doi.org/10.1177/0278364914553683","title":"Learning to soar: Resource-constrained exploration in reinforcement learning","display_name":"Learning to soar: Resource-constrained exploration in reinforcement learning","publication_year":2014,"publication_date":"2014-12-16","ids":{"openalex":"https://openalex.org/W2039013024","doi":"https://doi.org/10.1177/0278364914553683","mag":"2039013024"},"language":"en","primary_location":{"id":"doi:10.1177/0278364914553683","is_oa":false,"landing_page_url":"https://doi.org/10.1177/0278364914553683","pdf_url":null,"source":{"id":"https://openalex.org/S73484101","display_name":"The International Journal of Robotics Research","issn_l":"0278-3649","issn":["0278-3649","1741-3176"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of Robotics Research","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5009492797","display_name":"Jen Jen Chung","orcid":"https://orcid.org/0000-0001-7828-0741"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]},{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Jen Jen Chung","raw_affiliation_strings":["Australian Centre for Field Robotics, School of Aerospace, Mechanical and Mechatronic Engineering, The University of Sydney, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Centre for Field Robotics, School of Aerospace, Mechanical and Mechatronic Engineering, The University of Sydney, Australia","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I129604602"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103442984","display_name":"Nicholas R. J. Lawrance","orcid":null},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]},{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Nicholas R.J. Lawrance","raw_affiliation_strings":["Australian Centre for Field Robotics, School of Aerospace, Mechanical and Mechatronic Engineering, The University of Sydney, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Centre for Field Robotics, School of Aerospace, Mechanical and Mechatronic Engineering, The University of Sydney, Australia","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I129604602"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5004268965","display_name":"Salah Sukkarieh","orcid":"https://orcid.org/0000-0003-1173-9268"},"institutions":[{"id":"https://openalex.org/I129604602","display_name":"The University of Sydney","ror":"https://ror.org/0384j8v12","country_code":"AU","type":"education","lineage":["https://openalex.org/I129604602"]},{"id":"https://openalex.org/I4210127558","display_name":"Australian Centre for Robotic Vision","ror":"https://ror.org/02zv9xv82","country_code":"AU","type":"facility","lineage":["https://openalex.org/I4210127558"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Salah Sukkarieh","raw_affiliation_strings":["Australian Centre for Field Robotics, School of Aerospace, Mechanical and Mechatronic Engineering, The University of Sydney, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Australian Centre for Field Robotics, School of Aerospace, Mechanical and Mechatronic Engineering, The University of Sydney, Australia","institution_ids":["https://openalex.org/I4210127558","https://openalex.org/I129604602"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5009492797"],"corresponding_institution_ids":["https://openalex.org/I129604602","https://openalex.org/I4210127558"],"apc_list":null,"apc_paid":null,"fwci":2.5387,"has_fulltext":false,"cited_by_count":36,"citation_normalized_percentile":{"value":0.90976464,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"34","issue":"2","first_page":"158","last_page":"172"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12814","display_name":"Gaussian Processes and Bayesian Inference","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10848","display_name":"Advanced Multi-Objective Optimization Algorithms","score":0.9865999817848206,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12125","display_name":"Aerospace and Aviation Technology","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.8780269622802734},{"id":"https://openalex.org/keywords/soar","display_name":"Soar","score":0.7246041297912598},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6180722117424011},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6088178157806396},{"id":"https://openalex.org/keywords/bellman-equation","display_name":"Bellman equation","score":0.5988587141036987},{"id":"https://openalex.org/keywords/gaussian-process","display_name":"Gaussian process","score":0.5972110629081726},{"id":"https://openalex.org/keywords/q-learning","display_name":"Q-learning","score":0.5096290111541748},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.49652010202407837},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.49212783575057983},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.4408811032772064},{"id":"https://openalex.org/keywords/temporal-difference-learning","display_name":"Temporal difference learning","score":0.43134722113609314},{"id":"https://openalex.org/keywords/mathematical-optimization","display_name":"Mathematical optimization","score":0.3859833776950836},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.3226991891860962},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1775602102279663},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10533922910690308}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8780269622802734},{"id":"https://openalex.org/C17305859","wikidata":"https://www.wikidata.org/wiki/Q382944","display_name":"Soar","level":2,"score":0.7246041297912598},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6180722117424011},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6088178157806396},{"id":"https://openalex.org/C14646407","wikidata":"https://www.wikidata.org/wiki/Q1430750","display_name":"Bellman equation","level":2,"score":0.5988587141036987},{"id":"https://openalex.org/C61326573","wikidata":"https://www.wikidata.org/wiki/Q1496376","display_name":"Gaussian process","level":3,"score":0.5972110629081726},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.5096290111541748},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.49652010202407837},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.49212783575057983},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.4408811032772064},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.43134722113609314},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.3859833776950836},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.3226991891860962},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1775602102279663},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10533922910690308},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1177/0278364914553683","is_oa":false,"landing_page_url":"https://doi.org/10.1177/0278364914553683","pdf_url":null,"source":{"id":"https://openalex.org/S73484101","display_name":"The International Journal of Robotics Research","issn_l":"0278-3649","issn":["0278-3649","1741-3176"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320017","host_organization_name":"SAGE Publishing","host_organization_lineage":["https://openalex.org/P4310320017"],"host_organization_lineage_names":["SAGE Publishing"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The International Journal of Robotics Research","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W568924265","https://openalex.org/W1515851193","https://openalex.org/W1583837637","https://openalex.org/W1591803298","https://openalex.org/W1684737195","https://openalex.org/W1969834827","https://openalex.org/W1984615387","https://openalex.org/W2017995647","https://openalex.org/W2020920737","https://openalex.org/W2032239956","https://openalex.org/W2051499933","https://openalex.org/W2058626008","https://openalex.org/W2099201756","https://openalex.org/W2109909644","https://openalex.org/W2115384379","https://openalex.org/W2125069195","https://openalex.org/W2129564505","https://openalex.org/W2132849848","https://openalex.org/W2154032554","https://openalex.org/W2156974606","https://openalex.org/W2161674001","https://openalex.org/W2604272474","https://openalex.org/W2610686804","https://openalex.org/W2886362482","https://openalex.org/W3011120880","https://openalex.org/W3011631596","https://openalex.org/W3099664902","https://openalex.org/W3139377883","https://openalex.org/W4211049957","https://openalex.org/W4214717370"],"related_works":["https://openalex.org/W2955790965","https://openalex.org/W3105579180","https://openalex.org/W4308702637","https://openalex.org/W2149418961","https://openalex.org/W2806157618","https://openalex.org/W4256087190","https://openalex.org/W4298064558","https://openalex.org/W4287865573","https://openalex.org/W2808418668","https://openalex.org/W2025663273"],"abstract_inverted_index":{"This":[0],"paper":[1],"examines":[2],"temporal":[3],"difference":[4],"reinforcement":[5,53],"learning":[6,25,54,150,161],"with":[7,111,135,141],"adaptive":[8],"and":[9,126,177],"directed":[10,118],"exploration":[11,91,145],"for":[12,168],"resource-limited":[13],"missions.":[14],"The":[15,35,56,152],"scenario":[16],"considered":[17],"is":[18,67,163,175,185],"that":[19,66,98,155],"of":[20,73,85,102,124],"an":[21,100,108],"unpowered":[22],"aerial":[23],"glider":[24],"to":[26,46,69,76,116,129,146,165],"perform":[27],"energy-gaining":[28,180],"flight":[29],"trajectories":[30,171,181],"in":[31,51,83],"a":[32,41,52,61,94,136],"thermal":[33],"updraft.":[34],"presented":[36,133],"algorithm,":[37],"eGP-SARSA(":[38],"\u03bb),":[39],"uses":[40],"Gaussian":[42,57,78],"process":[43,58,79],"regression":[44],"model":[45,82],"estimate":[47,101],"the":[48,71,77,103,112,130,148,160],"value":[49,80,114],"function":[50,81,97,115],"framework.":[55],"also":[59],"provides":[60],"variance":[62],"on":[63],"these":[64],"estimates":[65],"used":[68],"measure":[70],"contribution":[72],"future":[74,104],"observations":[75],"terms":[84],"information":[86,105],"gain.":[87],"To":[88],"avoid":[89],"myopic":[90],"we":[92],"developed":[93],"resource-weighted":[95],"objective":[96,158],"combines":[99],"gain":[106],"using":[107],"action":[109,120],"rollout":[110],"estimated":[113],"generate":[117],"explorative":[119],"sequences.":[121],"A":[122],"number":[123],"modifications":[125],"computational":[127],"speed-ups":[128],"algorithm":[131],"are":[132],"along":[134],"standard":[137],"GP-SARSA(":[138],"\u03bb)":[139],"implementation":[140],"[Formula:":[142],"see":[143],"text]-greedy":[144],"compare":[147],"respective":[149],"performances.":[151],"results":[153],"show":[154],"under":[156],"this":[157],"function,":[159],"agent":[162],"able":[164],"continue":[166],"exploring":[167],"better":[169],"state-action":[170],"when":[172,182],"platform":[173,183],"energy":[174,184],"high":[176],"follow":[178],"conservative":[179],"low.":[186]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":4}],"updated_date":"2026-05-03T06:03:33.228499","created_date":"2025-10-10T00:00:00"}
