{"id":"https://openalex.org/W7130712686","doi":"https://doi.org/10.48550/arxiv.2602.17098","title":"Deep Reinforcement Learning for Optimal Portfolio Allocation: A Comparative Study with Mean-Variance Optimization","display_name":"Deep Reinforcement Learning for Optimal Portfolio Allocation: A Comparative Study with Mean-Variance Optimization","publication_year":2026,"publication_date":"2026-02-19","ids":{"openalex":"https://openalex.org/W7130712686","doi":"https://doi.org/10.48550/arxiv.2602.17098"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.17098","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043221382","display_name":"Srijan Sood","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sood, Srijan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030885959","display_name":"Kassiani Papasotiriou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Papasotiriou, Kassiani","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013195106","display_name":"Marius Vai\u010diulis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vaiciulis, Marius","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5103950078","display_name":"Tucker Hybinette Balch","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Balch, Tucker","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.39980000257492065,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.39980000257492065,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.2705000042915344,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11413","display_name":"Risk and Portfolio Optimization","score":0.17399999499320984,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/portfolio","display_name":"Portfolio","score":0.7573999762535095},{"id":"https://openalex.org/keywords/black\u2013litterman-model","display_name":"Black\u2013Litterman model","score":0.6703000068664551},{"id":"https://openalex.org/keywords/portfolio-optimization","display_name":"Portfolio optimization","score":0.6699000000953674},{"id":"https://openalex.org/keywords/post-modern-portfolio-theory","display_name":"Post-modern portfolio theory","score":0.6036999821662903},{"id":"https://openalex.org/keywords/asset-allocation","display_name":"Asset allocation","score":0.5372999906539917},{"id":"https://openalex.org/keywords/application-portfolio-management","display_name":"Application portfolio management","score":0.5343000292778015},{"id":"https://openalex.org/keywords/sharpe-ratio","display_name":"Sharpe ratio","score":0.5055999755859375},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4997999966144562},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.47999998927116394},{"id":"https://openalex.org/keywords/investment-strategy","display_name":"Investment strategy","score":0.44760000705718994}],"concepts":[{"id":"https://openalex.org/C2780821815","wikidata":"https://www.wikidata.org/wiki/Q5340806","display_name":"Portfolio","level":2,"score":0.7573999762535095},{"id":"https://openalex.org/C77913304","wikidata":"https://www.wikidata.org/wiki/Q879967","display_name":"Black\u2013Litterman model","level":5,"score":0.6703000068664551},{"id":"https://openalex.org/C202655437","wikidata":"https://www.wikidata.org/wiki/Q7231728","display_name":"Portfolio optimization","level":3,"score":0.6699000000953674},{"id":"https://openalex.org/C67051015","wikidata":"https://www.wikidata.org/wiki/Q7233558","display_name":"Post-modern portfolio theory","level":5,"score":0.6036999821662903},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5687000155448914},{"id":"https://openalex.org/C139819358","wikidata":"https://www.wikidata.org/wiki/Q462748","display_name":"Asset allocation","level":3,"score":0.5372999906539917},{"id":"https://openalex.org/C73938075","wikidata":"https://www.wikidata.org/wiki/Q2858767","display_name":"Application portfolio management","level":4,"score":0.5343000292778015},{"id":"https://openalex.org/C139938925","wikidata":"https://www.wikidata.org/wiki/Q1501898","display_name":"Sharpe ratio","level":3,"score":0.5055999755859375},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4997999966144562},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.47999998927116394},{"id":"https://openalex.org/C103144560","wikidata":"https://www.wikidata.org/wiki/Q2670999","display_name":"Investment strategy","level":3,"score":0.44760000705718994},{"id":"https://openalex.org/C74510933","wikidata":"https://www.wikidata.org/wiki/Q1936145","display_name":"Project portfolio management","level":3,"score":0.4410000145435333},{"id":"https://openalex.org/C76178495","wikidata":"https://www.wikidata.org/wiki/Q4808784","display_name":"Asset (computer security)","level":2,"score":0.43290001153945923},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4131999909877777},{"id":"https://openalex.org/C9725762","wikidata":"https://www.wikidata.org/wiki/Q1072885","display_name":"Modern portfolio theory","level":3,"score":0.40560001134872437},{"id":"https://openalex.org/C27548731","wikidata":"https://www.wikidata.org/wiki/Q88272","display_name":"Investment (military)","level":3,"score":0.4002000093460083},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.3856000006198883},{"id":"https://openalex.org/C162118730","wikidata":"https://www.wikidata.org/wiki/Q1128453","display_name":"Actuarial science","level":1,"score":0.3506999909877777},{"id":"https://openalex.org/C2778865806","wikidata":"https://www.wikidata.org/wiki/Q6060850","display_name":"Investment decisions","level":3,"score":0.32690000534057617},{"id":"https://openalex.org/C51485801","wikidata":"https://www.wikidata.org/wiki/Q16966861","display_name":"Efficient frontier","level":3,"score":0.3059000074863434},{"id":"https://openalex.org/C42475967","wikidata":"https://www.wikidata.org/wiki/Q194292","display_name":"Operations research","level":1,"score":0.30410000681877136},{"id":"https://openalex.org/C189188589","wikidata":"https://www.wikidata.org/wiki/Q14864997","display_name":"Investment management","level":3,"score":0.3018999993801117},{"id":"https://openalex.org/C19244329","wikidata":"https://www.wikidata.org/wiki/Q208697","display_name":"Financial market","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.29750001430511475},{"id":"https://openalex.org/C26584437","wikidata":"https://www.wikidata.org/wiki/Q6060874","display_name":"Investment performance","level":4,"score":0.29420000314712524},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.29260000586509705},{"id":"https://openalex.org/C109271563","wikidata":"https://www.wikidata.org/wiki/Q7451830","display_name":"Separation property","level":5,"score":0.2743000090122223},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C75956513","wikidata":"https://www.wikidata.org/wiki/Q609343","display_name":"Portfolio investment","level":3,"score":0.2605000138282776},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2596000134944916},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C21099588","wikidata":"https://www.wikidata.org/wiki/Q7314068","display_name":"Replicating portfolio","level":4,"score":0.25529998540878296},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.17098","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.17098","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.17098","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.17098","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"publisher-specific-oa","license_id":"https://openalex.org/licenses/publisher-specific-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Portfolio":[0,24,139],"Management":[1],"is":[2,26,46,137,167],"the":[3,17,33,67,129,182,197,208],"process":[4],"of":[5,9,19,57,72,96,128,184,207],"overseeing":[6],"a":[7,14,27,55,168],"group":[8],"investments,":[10],"referred":[11],"to":[12,38,63,83,113,148,159,186],"as":[13,37],"portfolio,":[15],"with":[16],"objective":[18],"achieving":[20],"predetermined":[21],"investment":[22,61,163],"goals.":[23],"optimization":[25,191],"key":[28],"component":[29],"that":[30],"involves":[31],"allocating":[32],"portfolio":[34,68,85,178,190],"assets":[35],"so":[36],"maximize":[39],"returns":[40,152],"while":[41],"minimizing":[42],"risk":[43],"taken.":[44],"It":[45],"typically":[47],"carried":[48],"out":[49],"by":[50,87,121],"financial":[51,122],"professionals":[52,123],"who":[53],"use":[54],"combination":[56],"quantitative":[58],"techniques":[59],"and":[60,153,174,219],"expertise":[62],"make":[64,187],"decisions":[65],"about":[66],"allocation.":[69,179],"Recent":[70],"applications":[71],"Deep":[73],"Reinforcement":[74],"Learning":[75],"(DRL)":[76],"have":[77],"shown":[78],"promising":[79],"results":[80,101,203],"when":[81],"used":[82,120,132,158],"optimize":[84,160],"allocation":[86],"training":[88],"model-free":[89,172],"agents":[90,109],"on":[91],"historical":[92,144],"market":[93],"data.":[94],"Many":[95],"these":[97],"methods":[98,119,133],"compare":[99,114],"their":[100,115],"against":[102,117],"basic":[103],"benchmarks":[104],"or":[105],"other":[106],"state-of-the-art":[107],"DRL":[108,173,188,209],"but":[110],"often":[111],"fail":[112],"performance":[116,206],"traditional":[118],"in":[124,193],"practical":[125],"settings.":[126],"One":[127],"most":[130],"commonly":[131],"for":[134,161,176,189,200],"this":[135],"task":[136],"Mean-Variance":[138],"Optimization":[140],"(MVO),":[141],"which":[142,155],"uses":[143],"time":[145],"series":[146],"information":[147],"estimate":[149],"expected":[150],"asset":[151],"covariances,":[154],"are":[156],"then":[157],"an":[162],"objective.":[164],"Our":[165],"work":[166,192],"thorough":[169],"comparison":[170],"between":[171],"MVO":[175],"optimal":[177],"We":[180],"detail":[181],"specifics":[183],"how":[185],"practice,":[194],"also":[195],"noting":[196],"adjustments":[198],"needed":[199],"MVO.":[201],"Backtest":[202],"demonstrate":[204],"strong":[205],"agent":[210],"across":[211],"many":[212],"metrics,":[213],"including":[214],"Sharpe":[215],"ratio,":[216],"maximum":[217],"drawdowns,":[218],"absolute":[220],"returns.":[221]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-21T00:00:00"}
