{"id":"https://openalex.org/W4416582776","doi":"https://doi.org/10.1109/jiot.2025.3636204","title":"A Two-Layered Reinforcement Learning Framework for AoI-Aware Trajectory Planning and Scheduling Optimization in Multi-UAV Networks","display_name":"A Two-Layered Reinforcement Learning Framework for AoI-Aware Trajectory Planning and Scheduling Optimization in Multi-UAV Networks","publication_year":2025,"publication_date":"2025-11-24","ids":{"openalex":"https://openalex.org/W4416582776","doi":"https://doi.org/10.1109/jiot.2025.3636204"},"language":null,"primary_location":{"id":"doi:10.1109/jiot.2025.3636204","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2025.3636204","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045796263","display_name":"Kang Fu","orcid":"https://orcid.org/0009-0004-3448-2981"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Kang Fu","raw_affiliation_strings":["School of Computer Science and Technology, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044948606","display_name":"Qingjie Zhao","orcid":"https://orcid.org/0000-0002-6955-4170"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingjie Zhao","raw_affiliation_strings":["School of Computer Science and Technology, Beijing Institute of Technology, Beijing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, Beijing Institute of Technology, Beijing, China","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100640443","display_name":"Lei Wang","orcid":"https://orcid.org/0000-0002-2377-5736"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lei Wang","raw_affiliation_strings":["Beijing Institute of Control Engineering, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Beijing Institute of Control Engineering, Beijing, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5045796263"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.48186153,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":"3","first_page":"4668","last_page":"4682"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.5220000147819519,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11133","display_name":"UAV Applications and Optimization","score":0.5220000147819519,"subfield":{"id":"https://openalex.org/subfields/2202","display_name":"Aerospace Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13553","display_name":"Age of Information Optimization","score":0.43619999289512634,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10273","display_name":"IoT and Edge/Fog Computing","score":0.025100000202655792,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7980999946594238},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.6308000087738037},{"id":"https://openalex.org/keywords/knapsack-problem","display_name":"Knapsack problem","score":0.5307999849319458},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.4546999931335449},{"id":"https://openalex.org/keywords/trajectory","display_name":"Trajectory","score":0.41370001435279846},{"id":"https://openalex.org/keywords/greedy-algorithm","display_name":"Greedy algorithm","score":0.399399995803833},{"id":"https://openalex.org/keywords/optimization-problem","display_name":"Optimization problem","score":0.3978999853134155},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.37959998846054077},{"id":"https://openalex.org/keywords/job-shop-scheduling","display_name":"Job shop scheduling","score":0.37619999051094055},{"id":"https://openalex.org/keywords/motion-planning","display_name":"Motion planning","score":0.36169999837875366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8065000176429749},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7980999946594238},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.6308000087738037},{"id":"https://openalex.org/C113138325","wikidata":"https://www.wikidata.org/wiki/Q864457","display_name":"Knapsack problem","level":2,"score":0.5307999849319458},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.4546999931335449},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.4377000033855438},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.41370001435279846},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.399399995803833},{"id":"https://openalex.org/C137836250","wikidata":"https://www.wikidata.org/wiki/Q984063","display_name":"Optimization problem","level":2,"score":0.3978999853134155},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.37959998846054077},{"id":"https://openalex.org/C55416958","wikidata":"https://www.wikidata.org/wiki/Q6206757","display_name":"Job shop scheduling","level":3,"score":0.37619999051094055},{"id":"https://openalex.org/C81074085","wikidata":"https://www.wikidata.org/wiki/Q366872","display_name":"Motion planning","level":3,"score":0.36169999837875366},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.3603000044822693},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.3562999963760376},{"id":"https://openalex.org/C185874996","wikidata":"https://www.wikidata.org/wiki/Q269699","display_name":"Interdependence","level":2,"score":0.35589998960494995},{"id":"https://openalex.org/C557945733","wikidata":"https://www.wikidata.org/wiki/Q389772","display_name":"Data transmission","level":2,"score":0.3402999937534332},{"id":"https://openalex.org/C120314980","wikidata":"https://www.wikidata.org/wiki/Q180634","display_name":"Distributed computing","level":1,"score":0.3319000005722046},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3118000030517578},{"id":"https://openalex.org/C173246807","wikidata":"https://www.wikidata.org/wiki/Q7833062","display_name":"Trajectory optimization","level":3,"score":0.30239999294281006},{"id":"https://openalex.org/C79403827","wikidata":"https://www.wikidata.org/wiki/Q3988","display_name":"Real-time computing","level":1,"score":0.3001999855041504},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.2944999933242798},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.2921999990940094},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.2842999994754791},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.27889999747276306},{"id":"https://openalex.org/C28761237","wikidata":"https://www.wikidata.org/wiki/Q7805321","display_name":"Time horizon","level":2,"score":0.27410000562667847},{"id":"https://openalex.org/C114073186","wikidata":"https://www.wikidata.org/wiki/Q2631895","display_name":"Automated planning and scheduling","level":2,"score":0.27379998564720154},{"id":"https://openalex.org/C107568181","wikidata":"https://www.wikidata.org/wiki/Q5319000","display_name":"Dynamic priority scheduling","level":3,"score":0.27070000767707825},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.2687000036239624},{"id":"https://openalex.org/C29202148","wikidata":"https://www.wikidata.org/wiki/Q287260","display_name":"Resource allocation","level":2,"score":0.265500009059906},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.26489999890327454},{"id":"https://openalex.org/C90509273","wikidata":"https://www.wikidata.org/wiki/Q11012","display_name":"Robot","level":2,"score":0.26420000195503235},{"id":"https://openalex.org/C24590314","wikidata":"https://www.wikidata.org/wiki/Q336038","display_name":"Wireless sensor network","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C148764684","wikidata":"https://www.wikidata.org/wiki/Q621751","display_name":"Approximation algorithm","level":2,"score":0.25429999828338623},{"id":"https://openalex.org/C41045048","wikidata":"https://www.wikidata.org/wiki/Q202843","display_name":"Linear programming","level":2,"score":0.25290000438690186}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jiot.2025.3636204","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jiot.2025.3636204","pdf_url":null,"source":{"id":"https://openalex.org/S2480266640","display_name":"IEEE Internet of Things Journal","issn_l":"2327-4662","issn":["2327-4662","2372-2541"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Internet of Things Journal","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W2145339207","https://openalex.org/W2948946638","https://openalex.org/W2963294159","https://openalex.org/W2990270976","https://openalex.org/W3038418740","https://openalex.org/W4210618000","https://openalex.org/W4237591687","https://openalex.org/W4285151170","https://openalex.org/W4313535466","https://openalex.org/W4380607101","https://openalex.org/W4384080876","https://openalex.org/W4390075244","https://openalex.org/W4390492411","https://openalex.org/W4392901695","https://openalex.org/W4399039546","https://openalex.org/W4402351808","https://openalex.org/W4402661222","https://openalex.org/W4402978156","https://openalex.org/W4403182701","https://openalex.org/W4405710287","https://openalex.org/W4405974928","https://openalex.org/W4406047669","https://openalex.org/W4407373090","https://openalex.org/W4407638602","https://openalex.org/W4408017150","https://openalex.org/W4408708980","https://openalex.org/W4411406739"],"related_works":[],"abstract_inverted_index":{"Unmanned":[0],"aerial":[1],"vehicles":[2],"(UAVs)":[3],"have":[4],"emerged":[5],"as":[6,130],"an":[7],"effective":[8],"solution":[9],"for":[10,74],"data":[11,21,141],"collection":[12,142],"in":[13],"Internet":[14],"of":[15,25,200],"Things":[16],"(IoT)":[17],"networks.":[18],"To":[19,63],"maintain":[20],"freshness,":[22],"the":[23,84,100,122,127,198,201],"age":[24],"information":[26],"(AoI)":[27],"has":[28],"become":[29],"a":[30,69,87,131],"key":[31],"performance":[32],"metric,":[33],"which":[34],"is":[35,91,160],"jointly":[36],"influenced":[37],"by":[38,98,107],"UAV":[39,96],"trajectory":[40,76],"planning":[41,77],"and":[42,59,78,110,118,143,146,181,191,205],"sensor":[43],"node":[44],"(SN)":[45],"scheduling.":[46],"However,":[47],"optimizing":[48],"these":[49],"two":[50],"interdependent":[51],"tasks":[52],"simultaneously":[53],"leads":[54],"to":[55,93,115,150,162],"high-dimensional":[56],"decision":[57],"spaces":[58],"unstable":[60],"learning":[61,72,89],"dynamics.":[62],"solve":[64],"this":[65],"problem,":[66],"we":[67,125],"propose":[68],"two-layered":[70],"reinforcement":[71,88],"framework":[73],"AoI-aware":[75],"scheduling":[79,128],"optimization,":[80,135],"named":[81],"TL-RATS.":[82],"In":[83,121],"upper":[85],"layer,":[86,124],"module":[90],"designed":[92],"learn":[94],"long-term":[95],"trajectories":[97],"using":[99],"agent-by-agent":[101],"policy":[102],"optimization":[103],"(A2PO)":[104],"algorithm,":[105],"enhanced":[106],"sequential":[108],"updates":[109],"preceding-agent":[111],"off-policy":[112],"correction":[113],"(PreOPC)":[114],"ensure":[116],"sample-efficient":[117],"stable":[119],"learning.":[120],"lower":[123],"formulate":[126],"problem":[129],"time-constrained":[132],"0-1":[133],"knapsack":[134],"where":[136],"each":[137],"item\u2019s":[138],"weight":[139],"represents":[140],"transmission":[144,164],"time,":[145],"its":[147],"value":[148],"corresponds":[149],"potential":[151],"AoI":[152,180],"reduction.":[153],"A":[154],"lightweight":[155],"dynamic":[156],"programming":[157],"(DP)":[158],"algorithm":[159],"used":[161],"allocate":[163],"opportunities":[165],"under":[166,171],"time":[167],"constraints.":[168],"Extensive":[169],"experiments":[170],"diverse":[172],"SN":[173],"distributions":[174],"demonstrate":[175],"that":[176],"TL-RATS":[177],"significantly":[178],"reduces":[179],"outperforms":[182],"representative":[183],"baselines,":[184],"including":[185],"MAPPO,":[186],"IPPO,":[187],"MAT,":[188],"greedy":[189],"scheduling,":[190],"fully":[192],"joint":[193],"policy.":[194],"These":[195],"results":[196],"highlight":[197],"benefits":[199],"proposed":[202],"layered":[203],"design":[204],"task-specific":[206],"coordination.":[207]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-25T00:00:00"}
