{"id":"https://openalex.org/W7138120394","doi":"https://doi.org/10.48550/arxiv.2603.13356","title":"Learning When to Trust in Contextual Bandits","display_name":"Learning When to Trust in Contextual Bandits","publication_year":2026,"publication_date":"2026-03-09","ids":{"openalex":"https://openalex.org/W7138120394","doi":"https://doi.org/10.48550/arxiv.2603.13356"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.13356","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13356","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.13356","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5129718512","display_name":"Majid Ghasemi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghasemi, Majid","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5129704311","display_name":"Mark Crowley","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Crowley, Mark","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.713100016117096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.713100016117096,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12101","display_name":"Advanced Bandit Algorithms Research","score":0.17239999771118164,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.03779999911785126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/regret","display_name":"Regret","score":0.746999979019165},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6265000104904175},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.5126000046730042},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5041999816894531},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.4779999852180481},{"id":"https://openalex.org/keywords/sublinear-function","display_name":"Sublinear function","score":0.38100001215934753},{"id":"https://openalex.org/keywords/boundary","display_name":"Boundary (topology)","score":0.37369999289512634}],"concepts":[{"id":"https://openalex.org/C50817715","wikidata":"https://www.wikidata.org/wiki/Q79895177","display_name":"Regret","level":2,"score":0.746999979019165},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6265000104904175},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6039999723434448},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.5126000046730042},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5041999816894531},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.4779999852180481},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4507000148296356},{"id":"https://openalex.org/C117160843","wikidata":"https://www.wikidata.org/wiki/Q338652","display_name":"Sublinear function","level":2,"score":0.38100001215934753},{"id":"https://openalex.org/C62354387","wikidata":"https://www.wikidata.org/wiki/Q875399","display_name":"Boundary (topology)","level":2,"score":0.37369999289512634},{"id":"https://openalex.org/C48677424","wikidata":"https://www.wikidata.org/wiki/Q6888088","display_name":"Mode (computer interface)","level":2,"score":0.3395000100135803},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.3368000090122223},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3346000015735626},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.2992999851703644},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2969000041484833},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.2800000011920929},{"id":"https://openalex.org/C71611378","wikidata":"https://www.wikidata.org/wiki/Q5165191","display_name":"Contextual design","level":3,"score":0.2703000009059906},{"id":"https://openalex.org/C37228920","wikidata":"https://www.wikidata.org/wiki/Q1307600","display_name":"Experiential learning","level":2,"score":0.2621999979019165}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.13356","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13356","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.13356","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.13356","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Standard":[0],"approaches":[1],"to":[2],"Robust":[3],"Reinforcement":[4],"Learning":[5],"assume":[6],"that":[7,54,84],"feedback":[8],"sources":[9],"are":[10,41],"either":[11],"globally":[12,15,102],"trustworthy":[13],"or":[14],"adversarial.":[16],"In":[17],"this":[18,22,34,60],"paper,":[19],"we":[20,25,70],"challenge":[21],"assumption":[23],"and":[24],"identify":[26],"a":[27,75],"more":[28],"subtle":[29],"failure":[30],"mode.":[31],"We":[32,52,82],"term":[33],"mode":[35],"as":[36],"Contextual":[37,64],"Sycophancy,":[38],"where":[39],"evaluators":[40],"truthful":[42],"in":[43,49,59],"benign":[44],"contexts":[45],"but":[46],"strategically":[47],"biased":[48],"critical":[50],"ones.":[51],"prove":[53,83],"standard":[55],"robust":[56],"methods":[57],"fail":[58],"setting,":[61],"suffering":[62],"from":[63],"Objective":[65],"Decoupling.":[66],"To":[67],"address":[68],"this,":[69],"propose":[71],"CESA-LinUCB,":[72],"which":[73],"learns":[74],"high-dimensional":[76],"Trust":[77],"Boundary":[78],"for":[79],"each":[80],"evaluator.":[81],"CESA-LinUCB":[85],"achieves":[86],"sublinear":[87],"regret":[88],"$\\tilde{O}(\\sqrt{T})$":[89],"against":[90],"contextual":[91],"adversaries,":[92],"recovering":[93],"the":[94],"ground":[95],"truth":[96],"even":[97],"when":[98],"no":[99],"evaluator":[100],"is":[101],"reliable.":[103]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-18T00:00:00"}
