{"id":"https://openalex.org/W4407951744","doi":"https://doi.org/10.1109/cdc56724.2024.10886011","title":"Reinforcement Learning with Quasi-Hyperbolic Discounting","display_name":"Reinforcement Learning with Quasi-Hyperbolic Discounting","publication_year":2024,"publication_date":"2024-12-16","ids":{"openalex":"https://openalex.org/W4407951744","doi":"https://doi.org/10.1109/cdc56724.2024.10886011"},"language":"en","primary_location":{"id":"doi:10.1109/cdc56724.2024.10886011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc56724.2024.10886011","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 63rd Conference on Decision and Control (CDC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5018071013","display_name":"S. R. Eshwar","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"S.R. Eshwar","raw_affiliation_strings":["Indian Institute of Science,Department of Computer Science and Automation,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science,Department of Computer Science and Automation,Bengaluru,India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114386978","display_name":"Mayank Motwani","orcid":null},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Mayank Motwani","raw_affiliation_strings":["Indian Institute of Technology,Department of Computer Science and Engineering,Bombay,India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Technology,Department of Computer Science and Engineering,Bombay,India","institution_ids":["https://openalex.org/I162827531"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5113230487","display_name":"Nibedita Roy","orcid":null},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Nibedita Roy","raw_affiliation_strings":["Indian Institute of Science,Department of Computer Science and Automation,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science,Department of Computer Science and Automation,Bengaluru,India","institution_ids":["https://openalex.org/I59270414"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047138399","display_name":"Gugan Thoppe","orcid":"https://orcid.org/0000-0001-5066-6589"},"institutions":[{"id":"https://openalex.org/I59270414","display_name":"Indian Institute of Science Bangalore","ror":"https://ror.org/04dese585","country_code":"IN","type":"education","lineage":["https://openalex.org/I59270414"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Gugan Thoppe","raw_affiliation_strings":["Indian Institute of Science,Department of Computer Science and Automation,Bengaluru,India"],"affiliations":[{"raw_affiliation_string":"Indian Institute of Science,Department of Computer Science and Automation,Bengaluru,India","institution_ids":["https://openalex.org/I59270414"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5018071013"],"corresponding_institution_ids":["https://openalex.org/I59270414"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22286032,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1911","last_page":"1916"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11270","display_name":"Complex Systems and Time Series Analysis","score":0.864799976348877,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11270","display_name":"Complex Systems and Time Series Analysis","score":0.864799976348877,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12137","display_name":"Economic theories and models","score":0.8360000252723694,"subfield":{"id":"https://openalex.org/subfields/2002","display_name":"Economics and Econometrics"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10067","display_name":"Stochastic processes and financial applications","score":0.8119999766349792,"subfield":{"id":"https://openalex.org/subfields/2003","display_name":"Finance"},"field":{"id":"https://openalex.org/fields/20","display_name":"Economics, Econometrics and Finance"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hyperbolic-discounting","display_name":"Hyperbolic discounting","score":0.7659966945648193},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7536864280700684},{"id":"https://openalex.org/keywords/reinforcement","display_name":"Reinforcement","score":0.6873021125793457},{"id":"https://openalex.org/keywords/discounting","display_name":"Discounting","score":0.6431760787963867},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.49565207958221436},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37013643980026245},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.28208279609680176},{"id":"https://openalex.org/keywords/economics","display_name":"Economics","score":0.19163668155670166},{"id":"https://openalex.org/keywords/social-psychology","display_name":"Social psychology","score":0.17159098386764526}],"concepts":[{"id":"https://openalex.org/C2779785983","wikidata":"https://www.wikidata.org/wiki/Q5957248","display_name":"Hyperbolic discounting","level":3,"score":0.7659966945648193},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7536864280700684},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.6873021125793457},{"id":"https://openalex.org/C6177178","wikidata":"https://www.wikidata.org/wiki/Q10998070","display_name":"Discounting","level":2,"score":0.6431760787963867},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.49565207958221436},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37013643980026245},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.28208279609680176},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.19163668155670166},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.17159098386764526},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cdc56724.2024.10886011","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cdc56724.2024.10886011","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE 63rd Conference on Decision and Control (CDC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10","score":0.44999998807907104}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321027","display_name":"Indo-French Centre for the Promotion of Advanced Research","ror":"https://ror.org/017aem598"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1967232224","https://openalex.org/W2004717177","https://openalex.org/W2006619603","https://openalex.org/W2077343054","https://openalex.org/W2094104200","https://openalex.org/W2118052532","https://openalex.org/W2145107370","https://openalex.org/W2242816694","https://openalex.org/W3044123424","https://openalex.org/W3098354098","https://openalex.org/W4214717370","https://openalex.org/W4381894891","https://openalex.org/W6617752786","https://openalex.org/W6629480535","https://openalex.org/W6683204974","https://openalex.org/W6705061487","https://openalex.org/W7014788697"],"related_works":["https://openalex.org/W2782303441","https://openalex.org/W1535512822","https://openalex.org/W3121914354","https://openalex.org/W1576577160","https://openalex.org/W2041733990","https://openalex.org/W3126103995","https://openalex.org/W3140183753","https://openalex.org/W2118335486","https://openalex.org/W4229825454","https://openalex.org/W3121746500"],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"has":[2,31],"traditionally":[3],"been":[4],"studied":[5],"with":[6,109,161],"exponential":[7],"discounting":[8,40],"or":[9,82],"the":[10,54,66,72,87,93,124,144,157,168],"average":[11],"reward":[12],"setup,":[13],"mainly":[14],"due":[15],"to":[16,65,96,107],"their":[17],"mathematical":[18],"tractability.":[19],"However,":[20],"such":[21],"frameworks":[22],"fall":[23],"short":[24],"of":[25,75,171],"accurately":[26],"capturing":[27],"human":[28],"behavior,":[29,103],"which":[30],"a":[32,42,110,114,133],"bias":[33],"towards":[34],"immediate":[35],"gratification.":[36],"Quasi-Hyperbolic":[37],"($\\mathbf{Q":[38],"H}$)":[39],"is":[41,80,90,106],"simple":[43],"alternative":[44,105],"for":[45,128,156],"modeling":[46],"this":[47,102,120,153],"bias.":[48],"Unlike":[49],"in":[50,113],"traditional":[51],"discounting,":[52],"though,":[53],"optimal":[55,91],"QH-policy,":[56],"starting":[57,68],"from":[58,69,86],"some":[59],"time":[60],"$t_{1}$,":[61],"can":[62,84],"be":[63,147],"different":[64],"one":[67],"$t_{2}$.":[70],"Hence,":[71],"future":[73],"self":[74],"an":[76,104,130,148],"agent,":[77],"if":[78,139],"it":[79],"naive":[81],"impatient,":[83],"deviate":[85],"policy":[88,111],"that":[89],"at":[92],"start,":[94],"leading":[95],"sub-optimal":[97],"overall":[98],"returns.":[99],"To":[100],"prevent":[101],"work":[108,165],"anchored":[112],"Markov":[115],"Perfect":[116],"Equilibrium":[117],"(MPE).":[118],"In":[119],"work,":[121],"we":[122,136],"propose":[123],"first":[125],"model-free":[126],"algorithm":[127,141],"finding":[129],"MPE.":[131,149],"Using":[132],"two-timescale":[134],"analysis,":[135],"show":[137],"that,":[138],"our":[140],"converges,":[142],"then":[143],"limit":[145],"must":[146],"We":[150],"also":[151],"validate":[152],"claim":[154],"numerically":[155],"standard":[158],"inventory":[159],"system":[160],"stochastic":[162],"demands.":[163],"Our":[164],"significantly":[166],"advances":[167],"practical":[169],"application":[170],"reinforcement":[172],"learning.":[173]},"counts_by_year":[],"updated_date":"2025-12-19T19:40:27.379048","created_date":"2025-10-10T00:00:00"}
