{"id":"https://openalex.org/W7106671174","doi":"https://doi.org/10.48550/arxiv.2511.18728","title":"Reinforcement Learning for Self-Healing Material Systems","display_name":"Reinforcement Learning for Self-Healing Material Systems","publication_year":2025,"publication_date":"2025-11-24","ids":{"openalex":"https://openalex.org/W7106671174","doi":"https://doi.org/10.48550/arxiv.2511.18728"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2511.18728","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.18728","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2511.18728","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chatterjee, Maitreyi","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Chatterjee, Maitreyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Agarwal, Devansh","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Agarwal, Devansh","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Chatterjee, Biplab","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chatterjee, Biplab","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.1835000067949295,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10653","display_name":"Robot Manipulation and Learning","score":0.1835000067949295,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12784","display_name":"Modular Robots and Swarm Intelligence","score":0.16599999368190765,"subfield":{"id":"https://openalex.org/subfields/2210","display_name":"Mechanical Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10462","display_name":"Reinforcement Learning in Robotics","score":0.12150000035762787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7652000188827515},{"id":"https://openalex.org/keywords/markov-decision-process","display_name":"Markov decision process","score":0.7332000136375427},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5946000218391418},{"id":"https://openalex.org/keywords/convergence","display_name":"Convergence (economics)","score":0.5300999879837036},{"id":"https://openalex.org/keywords/markov-process","display_name":"Markov process","score":0.48910000920295715},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.4674000144004822},{"id":"https://openalex.org/keywords/optimal-control","display_name":"Optimal control","score":0.4260999858379364},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.4129999876022339}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7652000188827515},{"id":"https://openalex.org/C106189395","wikidata":"https://www.wikidata.org/wiki/Q176789","display_name":"Markov decision process","level":3,"score":0.7332000136375427},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6531999707221985},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5946000218391418},{"id":"https://openalex.org/C2777303404","wikidata":"https://www.wikidata.org/wiki/Q759757","display_name":"Convergence (economics)","level":2,"score":0.5300999879837036},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.48910000920295715},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4674000144004822},{"id":"https://openalex.org/C91575142","wikidata":"https://www.wikidata.org/wiki/Q1971426","display_name":"Optimal control","level":2,"score":0.4260999858379364},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.4129999876022339},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.4032999873161316},{"id":"https://openalex.org/C107464732","wikidata":"https://www.wikidata.org/wiki/Q235781","display_name":"Adaptive control","level":3,"score":0.38679999113082886},{"id":"https://openalex.org/C17098449","wikidata":"https://www.wikidata.org/wiki/Q176814","display_name":"Partially observable Markov decision process","level":4,"score":0.3643999993801117},{"id":"https://openalex.org/C188116033","wikidata":"https://www.wikidata.org/wiki/Q2664563","display_name":"Q-learning","level":3,"score":0.3515999913215637},{"id":"https://openalex.org/C126255220","wikidata":"https://www.wikidata.org/wiki/Q141495","display_name":"Mathematical optimization","level":1,"score":0.34869998693466187},{"id":"https://openalex.org/C17500928","wikidata":"https://www.wikidata.org/wiki/Q959968","display_name":"Control system","level":2,"score":0.3476000130176544},{"id":"https://openalex.org/C133731056","wikidata":"https://www.wikidata.org/wiki/Q4917288","display_name":"Control engineering","level":1,"score":0.33160001039505005},{"id":"https://openalex.org/C37404715","wikidata":"https://www.wikidata.org/wiki/Q380679","display_name":"Dynamic programming","level":2,"score":0.3212999999523163},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.3059000074863434},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.29739999771118164},{"id":"https://openalex.org/C155386361","wikidata":"https://www.wikidata.org/wiki/Q1649571","display_name":"Process control","level":3,"score":0.29510000348091125},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.2745000123977661},{"id":"https://openalex.org/C8272713","wikidata":"https://www.wikidata.org/wiki/Q176737","display_name":"Stochastic process","level":2,"score":0.2678000032901764},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2619999945163727},{"id":"https://openalex.org/C77405623","wikidata":"https://www.wikidata.org/wiki/Q598451","display_name":"System dynamics","level":2,"score":0.26170000433921814}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2511.18728","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.18728","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2511.18728","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2511.18728","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.5314815044403076}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"transition":[1],"to":[2,10,34],"autonomous":[3],"material":[4,75],"systems":[5],"necessitates":[6],"adaptive":[7],"control":[8,84],"methodologies":[9],"maximize":[11],"structural":[12,42],"longevity.":[13],"This":[14],"study":[15],"frames":[16],"the":[17,78,92],"self-healing":[18,100],"process":[19],"as":[20],"a":[21,27,61],"Reinforcement":[22],"Learning":[23],"(RL)":[24],"problem":[25],"within":[26],"Markov":[28],"Decision":[29],"Process":[30],"(MDP),":[31],"enabling":[32],"agents":[33,59],"autonomously":[35],"derive":[36],"optimal":[37],"policies":[38],"that":[39,66],"efficiently":[40],"balance":[41],"integrity":[43],"maintenance":[44],"against":[45],"finite":[46],"resource":[47],"consumption.":[48],"A":[49],"comparative":[50],"evaluation":[51],"of":[52,94],"discrete-action":[53],"(Q-learning,":[54],"DQN)":[55],"and":[56,89],"continuous-action":[57],"(TD3)":[58],"in":[60,98],"stochastic":[62],"simulation":[63],"environment":[64],"revealed":[65],"RL":[67],"controllers":[68],"significantly":[69],"outperform":[70],"heuristic":[71],"baselines,":[72],"achieving":[73],"near-complete":[74],"recovery.":[76],"Crucially,":[77],"TD3":[79],"agent":[80],"utilizing":[81],"continuous":[82],"dosage":[83],"demonstrated":[85],"superior":[86],"convergence":[87],"speed":[88],"stability,":[90],"underscoring":[91],"necessity":[93],"fine-grained,":[95],"proportional":[96],"actuation":[97],"dynamic":[99],"applications.":[101]},"counts_by_year":[],"updated_date":"2025-11-27T01:16:37.896743","created_date":"2025-11-27T00:00:00"}
