{"id":"https://openalex.org/W4415428444","doi":"https://doi.org/10.3233/faia251067","title":"Flexible Blood Glucose Control: Offline Reinforcement Learning from Human Feedback","display_name":"Flexible Blood Glucose Control: Offline Reinforcement Learning from Human Feedback","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428444","doi":"https://doi.org/10.3233/faia251067"},"language":null,"primary_location":{"id":"doi:10.3233/faia251067","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251067","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251067","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011254198","display_name":"Harry Emerson","orcid":"https://orcid.org/0000-0002-5829-0261"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Harry Emerson","raw_affiliation_strings":["University of Bristol, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018942913","display_name":"Sam Gordon James","orcid":null},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Sam Gordon James","raw_affiliation_strings":["University of Bristol, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067769261","display_name":"Matthew Guy","orcid":"https://orcid.org/0000-0002-6818-2010"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Matthew Guy","raw_affiliation_strings":["University of Bristol, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077095301","display_name":"Ryan McConville","orcid":"https://orcid.org/0000-0002-7708-3110"},"institutions":[{"id":"https://openalex.org/I36234482","display_name":"University of Bristol","ror":"https://ror.org/0524sp257","country_code":"GB","type":"education","lineage":["https://openalex.org/I36234482"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Ryan McConville","raw_affiliation_strings":["University of Bristol, United Kingdom"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Bristol, United Kingdom","institution_ids":["https://openalex.org/I36234482"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":21.5148,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.99229499,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10560","display_name":"Diabetes Management and Research","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/2712","display_name":"Endocrinology, Diabetes and Metabolism"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T10560","display_name":"Diabetes Management and Research","score":0.9769999980926514,"subfield":{"id":"https://openalex.org/subfields/2712","display_name":"Endocrinology, Diabetes and Metabolism"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.7652999758720398},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.5575000047683716},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.5483999848365784},{"id":"https://openalex.org/keywords/feedback-control","display_name":"Feedback control","score":0.4481000006198883},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.44179999828338623},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.38940000534057617},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.3873000144958496},{"id":"https://openalex.org/keywords/preference","display_name":"Preference","score":0.3582000136375427},{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.3538999855518341}],"concepts":[{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.7652999758720398},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6028000116348267},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.5575000047683716},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.5483999848365784},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5131000280380249},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.48539999127388},{"id":"https://openalex.org/C3018651601","wikidata":"https://www.wikidata.org/wiki/Q183635","display_name":"Feedback control","level":2,"score":0.4481000006198883},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.44179999828338623},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39259999990463257},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.38940000534057617},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.3873000144958496},{"id":"https://openalex.org/C2781249084","wikidata":"https://www.wikidata.org/wiki/Q908656","display_name":"Preference","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.3538999855518341},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.3140999972820282},{"id":"https://openalex.org/C67203356","wikidata":"https://www.wikidata.org/wiki/Q1321905","display_name":"Reinforcement","level":2,"score":0.2957000136375427},{"id":"https://openalex.org/C2778445095","wikidata":"https://www.wikidata.org/wiki/Q18354077","display_name":"Sample complexity","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C2779328685","wikidata":"https://www.wikidata.org/wiki/Q1475557","display_name":"Patient safety","level":3,"score":0.2752000093460083},{"id":"https://openalex.org/C175079658","wikidata":"https://www.wikidata.org/wiki/Q7312165","display_name":"Remote patient monitoring","level":2,"score":0.272599995136261},{"id":"https://openalex.org/C2780791683","wikidata":"https://www.wikidata.org/wiki/Q846785","display_name":"Action (physics)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C52970973","wikidata":"https://www.wikidata.org/wiki/Q2497134","display_name":"Adaptive system","level":2,"score":0.2712000012397766},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2680000066757202},{"id":"https://openalex.org/C2779974597","wikidata":"https://www.wikidata.org/wiki/Q28448986","display_name":"Clinical Practice","level":2,"score":0.2667999863624573},{"id":"https://openalex.org/C137176749","wikidata":"https://www.wikidata.org/wiki/Q4105337","display_name":"Psychological resilience","level":2,"score":0.2648000121116638},{"id":"https://openalex.org/C107464732","wikidata":"https://www.wikidata.org/wiki/Q235781","display_name":"Adaptive control","level":3,"score":0.2621000111103058},{"id":"https://openalex.org/C2780490138","wikidata":"https://www.wikidata.org/wiki/Q7079636","display_name":"Offline learning","level":3,"score":0.26080000400543213},{"id":"https://openalex.org/C2908918659","wikidata":"https://www.wikidata.org/wiki/Q4927806","display_name":"Blood Glucose Self-Monitoring","level":5,"score":0.2583000063896179},{"id":"https://openalex.org/C125014702","wikidata":"https://www.wikidata.org/wiki/Q4680749","display_name":"Adaptive learning","level":2,"score":0.2567000091075897},{"id":"https://openalex.org/C100609095","wikidata":"https://www.wikidata.org/wiki/Q1335050","display_name":"Embodied cognition","level":2,"score":0.2502000033855438}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251067","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251067","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251067","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251067","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Reinforcement":[0],"learning":[1],"(RL)":[2],"holds":[3],"promise":[4],"for":[5,25,36,42,165,178],"supporting":[6,81],"personalised":[7],"decision-making":[8],"in":[9,46,172,185],"healthcare,":[10],"but":[11],"existing":[12],"approaches":[13],"often":[14],"struggle":[15],"to":[16,84,138,146],"incorporate":[17],"patient":[18,64,114,173],"expertise":[19],"and":[20,87,135,154,181],"individual":[21],"preferences,":[22],"key":[23],"components":[24],"clinically":[26],"viable":[27],"AI":[28,183],"systems.":[29],"This":[30],"work":[31],"introduces":[32],"PAINT":[33,49,112,160],"(Preference":[34],"Adaptation":[35],"Individualised":[37],"Treatment),":[38],"a":[39,99,162],"general":[40],"framework":[41],"preference-guided":[43],"offline":[44,78,170],"RL":[45,79,171],"safety-critical":[47],"settings.":[48],"combines":[50],"sketch-based":[51],"reward":[52,71],"annotation":[53,152],"with":[54,104,175],"safety-constrained":[55],"policy":[56],"optimisation,":[57],"enabling":[58,124],"fine-grained":[59],"preference":[60,85],"capture":[61],"from":[62],"historical":[63],"data":[65],"without":[66],"requiring":[67],"action":[68],"labels.":[69],"A":[70],"model":[72],"trained":[73],"on":[74],"this":[75],"feedback":[76,168],"guides":[77],"while":[80,123],"tunable":[82],"sensitivity":[83],"signals":[86],"enforcing":[88],"clinical":[89],"safety":[90],"constraints.":[91],"Using":[92],"type":[93],"1":[94],"diabetes":[95],"(T1D)":[96],"management":[97,130],"as":[98,128],"case":[100],"study,":[101],"in-silico":[102],"evaluation":[103],"the":[105],"FDA-accepted":[106],"T1D":[107],"simulator":[108],"demonstrates":[109],"that":[110],"can":[111],"reduces":[113],"risk":[115],"by":[116],"15%":[117],"over":[118],"commercial":[119],"baselines":[120],"under":[121],"guidance,":[122],"preference-driven":[125],"adaptations":[126],"such":[127],"improved":[129],"during":[131],"challenging":[132],"mealtime":[133],"events":[134],"enhanced":[136],"robustness":[137],"dosing":[139],"errors.":[140],"The":[141],"method":[142],"further":[143],"shows":[144],"resilience":[145],"real-world":[147],"challenges":[148],"including":[149],"sample":[150],"size,":[151],"noise,":[153],"inter-patient":[155],"variability.":[156],"These":[157],"findings":[158],"suggest":[159],"offers":[161],"practical":[163],"pathway":[164],"integrating":[166],"human":[167],"into":[169],"settings,":[174],"broader":[176],"implications":[177],"developing":[179],"trustworthy":[180],"adaptive":[182],"systems":[184],"healthcare.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-24T00:00:00"}
