{"id":"https://openalex.org/W7134977219","doi":"https://doi.org/10.48550/arxiv.2603.08931","title":"Optimizing Reinforcement Learning Training over Digital Twin Enabled Multi-fidelity Networks","display_name":"Optimizing Reinforcement Learning Training over Digital Twin Enabled Multi-fidelity Networks","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7134977219","doi":"https://doi.org/10.48550/arxiv.2603.08931"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.08931","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08931","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.08931","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115636472","display_name":"Hanzhi Yu","orcid":"https://orcid.org/0009-0002-4985-400X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Hanzhi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128734196","display_name":"Hasan Farooq","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Farooq, Hasan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128725513","display_name":"Julien Forgeat","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Forgeat, Julien","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014010940","display_name":"Shruti Bothe","orcid":"https://orcid.org/0000-0001-7481-8946"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bothe, Shruti","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062041438","display_name":"Kristijonas \u010cyras","orcid":"https://orcid.org/0000-0002-4353-8121"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cyras, Kristijonas","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037193213","display_name":"Md Moin Uddin Chowdhury","orcid":"https://orcid.org/0000-0003-4787-4971"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chowdhury, Md Moin Uddin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128708487","display_name":"Mingzhe Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Mingzhe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10148","display_name":"Advanced MIMO Systems Optimization","score":0.20999999344348907,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10148","display_name":"Advanced MIMO Systems Optimization","score":0.20999999344348907,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10714","display_name":"Software-Defined Networks and 5G","score":0.10170000046491623,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.08020000159740448,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6958000063896179},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.5232999920845032},{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.5045999884605408},{"id":"https://openalex.org/keywords/wireless-network","display_name":"Wireless network","score":0.46639999747276306},{"id":"https://openalex.org/keywords/base-station","display_name":"Base station","score":0.4041000008583069},{"id":"https://openalex.org/keywords/network-delay","display_name":"Network delay","score":0.3828999996185303},{"id":"https://openalex.org/keywords/network-performance","display_name":"Network performance","score":0.37700000405311584},{"id":"https://openalex.org/keywords/network-simulation","display_name":"Network simulation","score":0.37450000643730164},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.37049999833106995}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7300999760627747},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6958000063896179},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.5232999920845032},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.5045999884605408},{"id":"https://openalex.org/C108037233","wikidata":"https://www.wikidata.org/wiki/Q11375","display_name":"Wireless network","level":3,"score":0.46639999747276306},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.4645000100135803},{"id":"https://openalex.org/C68649174","wikidata":"https://www.wikidata.org/wiki/Q1379116","display_name":"Base station","level":2,"score":0.4041000008583069},{"id":"https://openalex.org/C152623178","wikidata":"https://www.wikidata.org/wiki/Q436417","display_name":"Network delay","level":3,"score":0.3828999996185303},{"id":"https://openalex.org/C203274722","wikidata":"https://www.wikidata.org/wiki/Q7001161","display_name":"Network performance","level":2,"score":0.37700000405311584},{"id":"https://openalex.org/C139940560","wikidata":"https://www.wikidata.org/wiki/Q290036","display_name":"Network simulation","level":2,"score":0.37450000643730164},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.37049999833106995},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35580000281333923},{"id":"https://openalex.org/C555944384","wikidata":"https://www.wikidata.org/wiki/Q249","display_name":"Wireless","level":2,"score":0.35339999198913574},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.350600004196167},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.3368000090122223},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3287999927997589},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32710000872612},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3248000144958496},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.31779998540878296},{"id":"https://openalex.org/C557945733","wikidata":"https://www.wikidata.org/wiki/Q389772","display_name":"Data transmission","level":2,"score":0.3052000105381012},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C153646914","wikidata":"https://www.wikidata.org/wiki/Q535695","display_name":"Cellular network","level":2,"score":0.30090001225471497},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.2973000109195709},{"id":"https://openalex.org/C108921912","wikidata":"https://www.wikidata.org/wiki/Q7834639","display_name":"Transmission delay","level":3,"score":0.2897000014781952},{"id":"https://openalex.org/C44154836","wikidata":"https://www.wikidata.org/wiki/Q45045","display_name":"Simulation","level":1,"score":0.2865000069141388},{"id":"https://openalex.org/C193415008","wikidata":"https://www.wikidata.org/wiki/Q639681","display_name":"Network architecture","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C2779844322","wikidata":"https://www.wikidata.org/wiki/Q2919140","display_name":"Tilt (camera)","level":2,"score":0.2759999930858612},{"id":"https://openalex.org/C21822782","wikidata":"https://www.wikidata.org/wiki/Q131214","display_name":"Antenna (radio)","level":2,"score":0.2660999894142151},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2606000006198883}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.08931","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08931","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.08931","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.08931","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0,18],"this":[1,170,216],"paper,":[2],"we":[3,20,107,218],"investigate":[4],"a":[5,22,26,39,43,89,220,256],"novel":[6],"digital":[7],"network":[8,24,79,115,125,156,244],"twin":[9],"(DNT)":[10],"assisted":[11],"deep":[12],"learning":[13,91],"(DL)":[14],"model":[15],"training":[16,163],"framework.":[17],"particular,":[19],"consider":[21],"physical":[23,48,114,124,155,212,243],"where":[25],"base":[27],"station":[28],"(BS)":[29],"uses":[30,260,272],"several":[31],"antennas":[32],"to":[33,58,67,76,96,135,146,160,179,193,250,255],"serve":[34],"multiple":[35],"mobile":[36],"users,":[37],"and":[38,85,116,157,187,229,252,268,278],"DNT":[40,159],"that":[41,224,237,259,271],"is":[42,94,126,144,178],"virtual":[44],"representation":[45],"of":[46,63,150,164,198],"the":[47,60,70,99,105,113,117,123,136,140,148,154,158,162,165,182,188,195,203,211,242,264,269,275,280],"network.":[49,213],"The":[50,119],"BS":[51,71],"must":[52],"adjust":[53,98],"its":[54],"antenna":[55,100],"tilt":[56,101,183],"angles":[57],"optimize":[59,181],"data":[61,110,120,137,151,189,196,209,245,281],"rates":[62,197],"all":[64,199],"users.":[65],"Due":[66],"user":[68,86],"mobility,":[69],"may":[72],"not":[73],"be":[74],"able":[75],"accurately":[77],"track":[78],"dynamics":[80],"such":[81],"as":[82,172,263],"wireless":[83],"channels":[84],"mobilities.":[87],"Hence,":[88],"reinforcement":[90],"(RL)":[92],"approach":[93],"used":[95],"dynamically":[97],"angles.":[102],"To":[103,214],"train":[104],"RL,":[106,267],"can":[108],"use":[109],"collected":[111,121,138,152],"from":[112,122,139,153,210],"DNT.":[118,141],"more":[127,131],"accurate":[128],"but":[129],"incurs":[130],"communication":[132],"overhead":[133],"compared":[134,254],"Therefore,":[142],"it":[143],"necessary":[145],"determine":[147],"ratio":[149,283],"improve":[161],"RL":[166,222,258],"model.":[167],"We":[168],"formulate":[169],"problem":[171,175],"an":[173],"optimization":[174,232],"whose":[176],"goal":[177],"jointly":[180],"angle":[184],"adjustment":[185],"policy":[186,231],"collection":[190,246,282],"strategy,":[191],"aiming":[192],"maximize":[194],"users":[200],"while":[201],"constraining":[202],"time":[204],"delay":[205,247],"introduced":[206],"by":[207,248],"collecting":[208],"solve":[215],"problem,":[217],"propose":[219],"hierarchical":[221,257],"framework":[223],"integrates":[225],"robust":[226],"adversarial":[227],"loss":[228],"proximal":[230],"(PPO).":[233],"Simulation":[234],"results":[235],"show":[236],"our":[238],"proposed":[239],"method":[240],"reduces":[241],"up":[249],"28.01%":[251],"1x":[253],"vanilla":[261],"PPO":[262],"first":[265,276],"level":[266,277],"baseline":[270],"robust-RL":[273],"at":[274],"selects":[279],"randomly.":[284]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-12T00:00:00"}
