{"id":"https://openalex.org/W7115569083","doi":"https://doi.org/10.3390/systems13121124","title":"MIRA: An LLM-Driven Dual-Loop Architecture for Metacognitive Reward Design","display_name":"MIRA: An LLM-Driven Dual-Loop Architecture for Metacognitive Reward Design","publication_year":2025,"publication_date":"2025-12-16","ids":{"openalex":"https://openalex.org/W7115569083","doi":"https://doi.org/10.3390/systems13121124"},"language":"en","primary_location":{"id":"doi:10.3390/systems13121124","is_oa":true,"landing_page_url":"https://doi.org/10.3390/systems13121124","pdf_url":"https://www.mdpi.com/2079-8954/13/12/1124/pdf","source":{"id":"https://openalex.org/S4210219410","display_name":"Systems","issn_l":"2079-8954","issn":["2079-8954"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2079-8954/13/12/1124/pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Weiying Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Weiying Zhang","raw_affiliation_strings":["Post Big Data Technology and Application Engineering Research Center of Jiangsu Province, Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Post Big Data Technology and Application Engineering Research Center of Jiangsu Province, Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yuhua Xu","orcid":"https://orcid.org/0000-0002-1545-3753"},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuhua Xu","raw_affiliation_strings":["Post Industry Technology R&D Center of the State Posts Bureau (IoT Technology), Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China"],"raw_orcid":"https://orcid.org/0000-0002-1545-3753","affiliations":[{"raw_affiliation_string":"Post Industry Technology R&D Center of the State Posts Bureau (IoT Technology), Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China","institution_ids":["https://openalex.org/I41198531"]}]},{"author_position":"last","author":{"id":null,"display_name":"Zhixin Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I41198531","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34","country_code":"CN","type":"education","lineage":["https://openalex.org/I41198531"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixin Sun","raw_affiliation_strings":["Post Big Data Technology and Application Engineering Research Center of Jiangsu Province, Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China","Post Industry Technology R&D Center of the State Posts Bureau (IoT Technology), Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Post Big Data Technology and Application Engineering Research Center of Jiangsu Province, Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China","institution_ids":["https://openalex.org/I41198531"]},{"raw_affiliation_string":"Post Industry Technology R&D Center of the State Posts Bureau (IoT Technology), Nanjing University of Posts and Telecommunications, 66 Xinmofan Road, Nanjing 210003, China","institution_ids":["https://openalex.org/I41198531"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I41198531"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.78446313,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"13","issue":"12","first_page":"1124","last_page":"1124"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2741999924182892,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.2741999924182892,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.09690000116825104,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.053700000047683716,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6539999842643738},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6090999841690063},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5601000189781189},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.5289000272750854},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.43220001459121704},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.37470000982284546},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.36660000681877136},{"id":"https://openalex.org/keywords/replica","display_name":"Replica","score":0.36469998955726624},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.36010000109672546}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7598999738693237},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6539999842643738},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6090999841690063},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5680000185966492},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5601000189781189},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4374000132083893},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.43220001459121704},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.37470000982284546},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.36660000681877136},{"id":"https://openalex.org/C2775937380","wikidata":"https://www.wikidata.org/wiki/Q1232589","display_name":"Replica","level":2,"score":0.36469998955726624},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.36010000109672546},{"id":"https://openalex.org/C33435437","wikidata":"https://www.wikidata.org/wiki/Q366791","display_name":"Curiosity","level":2,"score":0.359499990940094},{"id":"https://openalex.org/C196340769","wikidata":"https://www.wikidata.org/wiki/Q7698910","display_name":"Temporal difference learning","level":3,"score":0.3531999886035919},{"id":"https://openalex.org/C160145156","wikidata":"https://www.wikidata.org/wiki/Q778586","display_name":"Executable","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.3472999930381775},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.33869999647140503},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.3091000020503998},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.30640000104904175},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2962999939918518},{"id":"https://openalex.org/C149810388","wikidata":"https://www.wikidata.org/wiki/Q5374873","display_name":"Emulation","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C13662910","wikidata":"https://www.wikidata.org/wiki/Q193139","display_name":"Trajectory","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C2779436431","wikidata":"https://www.wikidata.org/wiki/Q30672407","display_name":"Policy learning","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.28439998626708984},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.28380000591278076},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.2766000032424927},{"id":"https://openalex.org/C2779338814","wikidata":"https://www.wikidata.org/wiki/Q5179285","display_name":"Covert","level":2,"score":0.2574999928474426},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.25529998540878296},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.3390/systems13121124","is_oa":true,"landing_page_url":"https://doi.org/10.3390/systems13121124","pdf_url":"https://www.mdpi.com/2079-8954/13/12/1124/pdf","source":{"id":"https://openalex.org/S4210219410","display_name":"Systems","issn_l":"2079-8954","issn":["2079-8954"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:ae6eebf03e22485db7432901a4004041","is_oa":true,"landing_page_url":"https://doaj.org/article/ae6eebf03e22485db7432901a4004041","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Systems, Vol 13, Iss 12, p 1124 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/systems13121124","is_oa":true,"landing_page_url":"https://doi.org/10.3390/systems13121124","pdf_url":"https://www.mdpi.com/2079-8954/13/12/1124/pdf","source":{"id":"https://openalex.org/S4210219410","display_name":"Systems","issn_l":"2079-8954","issn":["2079-8954"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1407611265","display_name":null,"funder_award_id":"NY222029","funder_id":"https://openalex.org/F4320323268","funder_display_name":"Nanjing University of Posts and Telecommunications"},{"id":"https://openalex.org/G331498706","display_name":null,"funder_award_id":"62272239","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8971924296","display_name":null,"funder_award_id":"CX(22)1007","funder_id":"https://openalex.org/F4320309870","funder_display_name":"Jiangsu Agricultural Science and Technology Innovation Fund"}],"funders":[{"id":"https://openalex.org/F4320309870","display_name":"Jiangsu Agricultural Science and Technology Innovation Fund","ror":"https://ror.org/0335n2618"},{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320323268","display_name":"Nanjing University of Posts and Telecommunications","ror":"https://ror.org/043bpky34"},{"id":"https://openalex.org/F4320324852","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7115569083.pdf","grobid_xml":"https://content.openalex.org/works/W7115569083.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"A":[0,135],"central":[1],"obstacle":[2],"to":[3,46,55,68,157,181],"the":[4,12,48,89,103,150,179],"practical":[5],"deployment":[6],"of":[7,14,51,131],"Reinforcement":[8],"Learning":[9],"(RL)":[10],"is":[11],"prevalence":[13],"sparse":[15,160],"rewards,":[16],"which":[17],"often":[18],"necessitates":[19],"task-specific":[20],"dense":[21],"signals":[22],"crafted":[23],"through":[24,122],"costly":[25],"trial-and-error.":[26],"Automated":[27],"reward":[28,56,70,96,115,133,143],"decomposition":[29],"and":[30,43,82,153,197,201],"return\u2013redistribution":[31],"methods":[32],"can":[33],"reduce":[34],"this":[35],"burden,":[36],"but":[37,76],"they":[38],"are":[39,79],"largely":[40],"semantically":[41],"agnostic":[42],"may":[44,83],"fail":[45],"capture":[47],"multifaceted":[49],"nature":[50],"task":[52,74],"performance,":[53],"leading":[54],"hacking":[57],"or":[58,86],"stalled":[59],"exploration.":[60],"Recent":[61],"work":[62],"uses":[63],"Large":[64],"Language":[65],"Models":[66],"(LLMs)":[67],"generate":[69],"functions":[71],"from":[72,88,142],"high-level":[73],"descriptions,":[75],"these":[77],"specifications":[78,196],"typically":[80],"static":[81],"encode":[84],"biases":[85],"inaccuracies":[87],"pretrained":[90],"model,":[91],"resulting":[92],"in":[93],"a":[94,109,118,129,154],"priori":[95],"misspecification.":[97],"To":[98],"address":[99],"this,":[100],"we":[101],"propose":[102],"Metacognitive":[104],"Introspective":[105],"Reward":[106],"Architecture":[107],"(MIRA),":[108],"closed-loop":[110],"architecture":[111],"that":[112,191],"treats":[113],"LLM-generated":[114],"code":[116],"as":[117],"dynamic":[119],"object":[120],"refined":[121],"empirical":[123],"feedback.":[124],"An":[125],"LLM":[126,180],"first":[127],"produces":[128],"set":[130],"computable":[132],"factors.":[134],"dual-loop":[136],"design":[137],"then":[138],"decouples":[139],"policy":[140,152],"learning":[141,168],"revision:":[144],"an":[145,164],"inner":[146],"loop":[147,166],"jointly":[148],"trains":[149],"agent\u2019s":[151],"reward-synthesis":[155],"network":[156],"align":[158],"with":[159],"ground-truth":[161],"outcomes,":[162],"while":[163],"outer":[165],"monitors":[167],"dynamics":[169],"via":[170],"diagnostic":[171],"metrics":[172],"and,":[173],"upon":[174],"detecting":[175],"pathological":[176],"signatures,":[177],"invokes":[178],"perform":[182],"targeted":[183],"structural":[184],"edits.":[185],"Experiments":[186],"on":[187],"MuJoCo":[188],"benchmarks":[189],"show":[190],"MIRA":[192],"corrects":[193],"flawed":[194],"initial":[195],"improves":[198],"asymptotic":[199],"performance":[200],"sample":[202],"efficiency":[203],"over":[204],"strong":[205],"reward-design":[206],"baselines.":[207]},"counts_by_year":[],"updated_date":"2026-05-06T08:25:59.206177","created_date":"2025-12-16T00:00:00"}
