{"id":"https://openalex.org/W4416748519","doi":"https://doi.org/10.1109/iros60139.2025.11246149","title":"Confidence-Controlled Exploration: Efficient Sparse-Reward Policy Learning for Robot Navigation","display_name":"Confidence-Controlled Exploration: Efficient Sparse-Reward Policy Learning for Robot Navigation","publication_year":2025,"publication_date":"2025-10-19","ids":{"openalex":"https://openalex.org/W4416748519","doi":"https://doi.org/10.1109/iros60139.2025.11246149"},"language":null,"primary_location":{"id":"doi:10.1109/iros60139.2025.11246149","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246149","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004951324","display_name":"Bhrij Patel","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bhrij Patel","raw_affiliation_strings":["University of Maryland,Department of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Department of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080472165","display_name":"Kasun Weerakoon","orcid":null},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kasun Weerakoon","raw_affiliation_strings":["University of Maryland,Department of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Department of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056599663","display_name":"Wesley A. Suttle","orcid":"https://orcid.org/0000-0003-1234-7151"},"institutions":[{"id":"https://openalex.org/I166416128","display_name":"DEVCOM Army Research Laboratory","ror":"https://ror.org/011hc8f90","country_code":"US","type":"government","lineage":["https://openalex.org/I1304082316","https://openalex.org/I1330347796","https://openalex.org/I166416128","https://openalex.org/I2802705668","https://openalex.org/I4210154437"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wesley A. Suttle","raw_affiliation_strings":["U.S. Army Research Laboratory,MD,USA"],"affiliations":[{"raw_affiliation_string":"U.S. Army Research Laboratory,MD,USA","institution_ids":["https://openalex.org/I166416128"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025896653","display_name":"Alec Koppel","orcid":"https://orcid.org/0000-0003-2447-2873"},"institutions":[{"id":"https://openalex.org/I1305429384","display_name":"JPMorgan Chase & Co (United States)","ror":"https://ror.org/01x3kkr08","country_code":"US","type":"company","lineage":["https://openalex.org/I1305429384"]},{"id":"https://openalex.org/I2802755631","display_name":"Morgan Stanley (United States)","ror":"https://ror.org/00aphdz18","country_code":"US","type":"company","lineage":["https://openalex.org/I2802755631"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alec Koppel","raw_affiliation_strings":["JP Morgan AI Research,New York,NY,USA"],"affiliations":[{"raw_affiliation_string":"JP Morgan AI Research,New York,NY,USA","institution_ids":["https://openalex.org/I2802755631","https://openalex.org/I1305429384"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053858045","display_name":"Brian M. Sadler","orcid":"https://orcid.org/0000-0002-9564-3812"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian M. Sadler","raw_affiliation_strings":["University of Texas at Austin,Austin,TX,USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin,Austin,TX,USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039076312","display_name":"Tianyi Zhou","orcid":"https://orcid.org/0000-0001-5348-0632"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Tianyi Zhou","raw_affiliation_strings":["University of Maryland,Department of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Department of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5004194238","display_name":"Dinesh Manocha","orcid":"https://orcid.org/0000-0001-7047-9801"},"institutions":[{"id":"https://openalex.org/I66946132","display_name":"University of Maryland, College Park","ror":"https://ror.org/047s2c258","country_code":"US","type":"education","lineage":["https://openalex.org/I66946132"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dinesh Manocha","raw_affiliation_strings":["University of Maryland,Department of Computer Science,College Park,MD,USA"],"affiliations":[{"raw_affiliation_string":"University of Maryland,Department of Computer Science,College Park,MD,USA","institution_ids":["https://openalex.org/I66946132"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5039563144","display_name":"Amrit Singh Bedi","orcid":"https://orcid.org/0000-0002-8807-2695"},"institutions":[{"id":"https://openalex.org/I106165777","display_name":"University of Central Florida","ror":"https://ror.org/036nfer12","country_code":"US","type":"education","lineage":["https://openalex.org/I106165777"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amrit Singh Bedi","raw_affiliation_strings":["University of Central Florida,Department of Computer Science,Orlando,FL,USA"],"affiliations":[{"raw_affiliation_string":"University of Central Florida,Department of Computer Science,Orlando,FL,USA","institution_ids":["https://openalex.org/I106165777"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5004951324"],"corresponding_institution_ids":["https://openalex.org/I66946132"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19290057,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2337","last_page":"2344"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4221999943256378,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.4221999943256378,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10586","display_name":"Robotic Path Planning Algorithms","score":0.2565000057220459,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10191","display_name":"Robotics and Sensor-Based Localization","score":0.05979999899864197,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6945000290870667},{"id":"https://openalex.org/keywords/inefficiency","display_name":"Inefficiency","score":0.6782000064849854},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.5473999977111816},{"id":"https://openalex.org/keywords/robot","display_name":"Robot","score":0.5188999772071838},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.49889999628067017},{"id":"https://openalex.org/keywords/regularization","display_name":"Regularization (linguistics)","score":0.44369998574256897}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6945000290870667},{"id":"https://openalex.org/C2778869765","wikidata":"https://www.wikidata.org/wiki/Q6028363","display_name":"Inefficiency","level":2,"score":0.6782000064849854},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6420999765396118},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.557699978351593},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.5473999977111816},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.5188999772071838},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.49889999628067017},{"id":"https://openalex.org/C2776135515","wikidata":"https://www.wikidata.org/wiki/Q17143721","display_name":"Regularization (linguistics)","level":2,"score":0.44369998574256897},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.44179999828338623},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3521000146865845},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.3490000069141388},{"id":"https://openalex.org/C34413123","wikidata":"https://www.wikidata.org/wiki/Q170978","display_name":"Robotics","level":3,"score":0.3301999866962433},{"id":"https://openalex.org/C19966478","wikidata":"https://www.wikidata.org/wiki/Q4810574","display_name":"Mobile robot","level":3,"score":0.27619999647140503},{"id":"https://openalex.org/C117765406","wikidata":"https://www.wikidata.org/wiki/Q5362437","display_name":"Generalization error","level":3,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/iros60139.2025.11246149","is_oa":false,"landing_page_url":"https://doi.org/10.1109/iros60139.2025.11246149","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE/RSJ International Conference on Intelligent Robots and Systems (IROS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320316514","display_name":"Arm","ror":"https://ror.org/04mmhzs81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W51694917","https://openalex.org/W1424654272","https://openalex.org/W2096649264","https://openalex.org/W2112765698","https://openalex.org/W2158782408","https://openalex.org/W2198041288","https://openalex.org/W2484085037","https://openalex.org/W2770898551","https://openalex.org/W2885549115","https://openalex.org/W2912063360","https://openalex.org/W2963099939","https://openalex.org/W2963523627","https://openalex.org/W2989870698","https://openalex.org/W2997343068","https://openalex.org/W3006499227","https://openalex.org/W3045974612","https://openalex.org/W3095339682","https://openalex.org/W3153423274","https://openalex.org/W3158253560","https://openalex.org/W3204691825","https://openalex.org/W3205830017","https://openalex.org/W3207342878","https://openalex.org/W4205463258","https://openalex.org/W4220840735","https://openalex.org/W4254356098","https://openalex.org/W4256431479","https://openalex.org/W4383109304"],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"is":[3,96,106,112],"a":[4,48,113,120,178,188],"promising":[5],"approach":[6],"for":[7],"robotic":[8,20,57],"navigation,":[9],"allowing":[10],"robots":[11],"to":[12,28,35,98,108],"learn":[13],"through":[14],"trial":[15],"and":[16,31,71,101,115,126,134,138,148,153,160,172],"error.":[17],"However,":[18],"real-world":[19,154],"tasks":[21],"often":[22],"suffer":[23],"from":[24],"sparse":[25],"rewards,":[26,80],"leading":[27],"inefficient":[29],"exploration":[30,100],"suboptimal":[32],"policies":[33],"due":[34],"sample":[36,53,181],"inefficiency":[37],"of":[38],"RL.":[39],"In":[40],"this":[41],"work,":[42],"we":[43,184],"introduce":[44,76],"Confidence-Controlled":[45],"Exploration":[46],"(CCE),":[47],"novel":[49],"method":[50],"that":[51],"improves":[52],"efficiency":[54],"in":[55,150,195],"RL-based":[56],"navigation":[58,155],"without":[59],"modifying":[60],"the":[61],"reward":[62,72],"function.":[63],"Unlike":[64],"existing":[65],"approaches,":[66],"such":[67],"as":[68],"entropy":[69,125],"regularization":[70],"shaping,":[73],"which":[74],"can":[75],"instability":[77],"by":[78,119],"altering":[79],"CCE":[81,111,144,157,186],"dynamically":[82],"adjusts":[83],"trajectory":[84],"length":[85],"based":[86],"on":[87,187],"policy":[88,124],"entropy.":[89],"Specifically,":[90],"it":[91],"shortens":[92],"trajectories":[93],"when":[94,104],"uncertainty":[95],"high":[97,107],"enhance":[99],"extends":[102],"them":[103],"confidence":[105],"prioritize":[109],"exploitation.":[110],"principled":[114],"practical":[116],"solution":[117],"inspired":[118],"theoretical":[121],"connection":[122],"between":[123],"gradient":[127],"estimation.":[128],"It":[129],"integrates":[130],"seamlessly":[131],"with":[132],"on-policy":[133],"off-policy":[135],"RL":[136],"methods":[137],"requires":[139],"minimal":[140],"modifications.":[141],"We":[142],"validate":[143],"across":[145],"REINFORCE,":[146],"PPO,":[147],"SAC":[149],"both":[151],"simulated":[152],"tasks.":[156],"outperforms":[158],"fixed-trajectory":[159],"entropy-regularized":[161],"baselines,":[162],"achieving":[163],"an":[164],"18%":[165],"higher":[166],"success":[167],"rate,":[168],"20-38%":[169],"shorter":[170],"paths,":[171],"9.32%":[173],"lower":[174],"elevation":[175],"costs":[176],"under":[177],"fixed":[179],"training":[180],"budget.":[182],"Finally,":[183],"deploy":[185],"Clearpath":[189],"Husky":[190],"robot,":[191],"demonstrating":[192],"its":[193],"effectiveness":[194],"complex":[196],"outdoor":[197],"environments.":[198]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-28T00:00:00"}
