{"id":"https://openalex.org/W7135051460","doi":"https://doi.org/10.48550/arxiv.2603.10521","title":"IH-Challenge: A Training Dataset to Improve Instruction Hierarchy on Frontier LLMs","display_name":"IH-Challenge: A Training Dataset to Improve Instruction Hierarchy on Frontier LLMs","publication_year":2026,"publication_date":"2026-03-11","ids":{"openalex":"https://openalex.org/W7135051460","doi":"https://doi.org/10.48550/arxiv.2603.10521"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.10521","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10521","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.10521","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5128900858","display_name":"Chuan Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Guo, Chuan","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128823414","display_name":"Juan Felipe Ceron Uribe","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Uribe, Juan Felipe Ceron","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128819834","display_name":"Sicheng Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhu, Sicheng","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063017186","display_name":"Christopher A. Choquette-Choo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Choquette-Choo, Christopher A.","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128918261","display_name":"Steph Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lin, Steph","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056645742","display_name":"Nikhil Kandpal","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kandpal, Nikhil","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059480732","display_name":"Milad Nasr","orcid":"https://orcid.org/0000-0002-1913-6157"},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nasr, Milad","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128890827","display_name":"Rai","orcid":null},"institutions":[{"id":"https://openalex.org/I4210131859","display_name":"Michael & Associates","ror":"https://ror.org/03dbx2z52","country_code":"US","type":"facility","lineage":["https://openalex.org/I4210131859"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rai","raw_affiliation_strings":["Michael Pokorny"],"affiliations":[{"raw_affiliation_string":"Michael Pokorny","institution_ids":["https://openalex.org/I4210131859"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085124552","display_name":"Sam Toyer","orcid":"https://orcid.org/0000-0002-6665-6593"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Toyer, Sam","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128861020","display_name":"Miles Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Miles","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128912018","display_name":"Yaodong Yu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Yaodong","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080988309","display_name":"Alex Beutel","orcid":"https://orcid.org/0000-0002-5917-2849"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beutel, Alex","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5128866460","display_name":"Kai Xiao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xiao, Kai","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5128900858"],"corresponding_institution_ids":["https://openalex.org/I4210131859"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.3580999970436096,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.3580999970436096,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.15279999375343323,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.14259999990463257,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.6342999935150146},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.6248000264167786},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5289000272750854},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.503600001335144},{"id":"https://openalex.org/keywords/helpfulness","display_name":"Helpfulness","score":0.4781999886035919},{"id":"https://openalex.org/keywords/frontier","display_name":"Frontier","score":0.4472000002861023},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.4262999892234802}],"concepts":[{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.6342999935150146},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6252999901771545},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.6248000264167786},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.503600001335144},{"id":"https://openalex.org/C2781265381","wikidata":"https://www.wikidata.org/wiki/Q5710255","display_name":"Helpfulness","level":2,"score":0.4781999886035919},{"id":"https://openalex.org/C2778571376","wikidata":"https://www.wikidata.org/wiki/Q1355821","display_name":"Frontier","level":2,"score":0.4472000002861023},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.4262999892234802},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4230000078678131},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.4187999963760376},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.38670000433921814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.34769999980926514},{"id":"https://openalex.org/C168725872","wikidata":"https://www.wikidata.org/wiki/Q991663","display_name":"Sophistication","level":2,"score":0.34049999713897705},{"id":"https://openalex.org/C195094911","wikidata":"https://www.wikidata.org/wiki/Q14167904","display_name":"Process management","level":1,"score":0.32679998874664307},{"id":"https://openalex.org/C52146309","wikidata":"https://www.wikidata.org/wiki/Q7431116","display_name":"Schema (genetic algorithms)","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C2781466463","wikidata":"https://www.wikidata.org/wiki/Q621695","display_name":"Blame","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.2856000065803528},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.28060001134872437},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.27720001339912415},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.27619999647140503}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.10521","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10521","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.10521","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.10521","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.5860569477081299,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Instruction":[0],"hierarchy":[1],"(IH)":[2],"defines":[3],"how":[4],"LLMs":[5],"prioritize":[6],"system,":[7],"developer,":[8],"user,":[9],"and":[10,34,58,98,119],"tool":[11],"instructions":[12],"under":[13],"conflict,":[14],"providing":[15],"a":[16,69],"concrete,":[17],"trust-ordered":[18],"policy":[19],"for":[20],"resolving":[21],"instruction":[22,144],"conflicts.":[23],"IH":[24,40,46,88],"is":[25,42],"key":[26],"to":[27,44,74,103,110,138],"defending":[28],"against":[29],"jailbreaks,":[30],"system":[31],"prompt":[32,36,125],"extractions,":[33],"agentic":[35,124],"injections.":[37],"However,":[38],"robust":[39,143],"behavior":[41,107],"difficult":[43],"train:":[45],"failures":[47],"can":[48,55,60],"be":[49,56],"confounded":[50],"with":[51,82,128],"instruction-following":[52],"failures,":[53],"conflicts":[54],"nuanced,":[57],"models":[59],"learn":[61],"shortcuts":[62],"such":[63],"as":[64],"overrefusing.":[65],"We":[66,132],"introduce":[67],"IH-Challenge,":[68],"reinforcement":[70],"learning":[71],"training":[72],"dataset,":[73],"address":[75],"these":[76],"difficulties.":[77],"Fine-tuning":[78],"GPT-5-Mini":[79],"on":[80,92,115,142],"IH-Challenge":[81,135],"online":[83],"adversarial":[84],"example":[85],"generation":[86],"improves":[87],"robustness":[89],"by":[90],"+10.0%":[91],"average":[93],"across":[94],"16":[95],"in-distribution,":[96],"out-of-distribution,":[97],"human":[99],"red-teaming":[100],"benchmarks":[101],"(84.1%":[102],"94.1%),":[104],"reduces":[105],"unsafe":[106],"from":[108],"6.6%":[109],"0.7%":[111],"while":[112],"improving":[113],"helpfulness":[114],"general":[116],"safety":[117],"evaluations,":[118],"saturates":[120],"an":[121],"internal":[122],"static":[123],"injection":[126],"evaluation,":[127],"minimal":[129],"capability":[130],"regression.":[131],"release":[133],"the":[134],"dataset":[136],"(https://huggingface.co/datasets/openai/ih-challenge)":[137],"support":[139],"future":[140],"research":[141],"hierarchy.":[145]},"counts_by_year":[],"updated_date":"2026-03-13T14:25:03.468858","created_date":"2026-03-13T00:00:00"}
