{"id":"https://openalex.org/W7130542815","doi":"https://doi.org/10.1109/fllm67465.2025.11391110","title":"Detection of LLM Deceptive Behaviour Triggered by the Poisonous Context Injection: The Problem Demonstration","display_name":"Detection of LLM Deceptive Behaviour Triggered by the Poisonous Context Injection: The Problem Demonstration","publication_year":2025,"publication_date":"2025-11-25","ids":{"openalex":"https://openalex.org/W7130542815","doi":"https://doi.org/10.1109/fllm67465.2025.11391110"},"language":null,"primary_location":{"id":"doi:10.1109/fllm67465.2025.11391110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11391110","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5036688176","display_name":"Stanislav Selitskiy","orcid":"https://orcid.org/0000-0003-1758-0171"},"institutions":[{"id":"https://openalex.org/I147554453","display_name":"University of Bedfordshire","ror":"https://ror.org/0400avk24","country_code":"GB","type":"education","lineage":["https://openalex.org/I147554453"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Stanislav Selitskiy","raw_affiliation_strings":["University of Bedfordshire,School Of Computer Science And Technology,Luton,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Bedfordshire,School Of Computer Science And Technology,Luton,UK","institution_ids":["https://openalex.org/I147554453"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045644993","display_name":"Chihiro Inoue","orcid":"https://orcid.org/0000-0003-1927-6923"},"institutions":[{"id":"https://openalex.org/I147554453","display_name":"University of Bedfordshire","ror":"https://ror.org/0400avk24","country_code":"GB","type":"education","lineage":["https://openalex.org/I147554453"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Chihiro Inoue","raw_affiliation_strings":["University of Bedfordshire,Centre For Research In English Language Learning And Assessment,Luton,UK"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Bedfordshire,Centre For Research In English Language Learning And Assessment,Luton,UK","institution_ids":["https://openalex.org/I147554453"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.61178598,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"732","last_page":"737"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.1979999989271164,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.1979999989271164,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.12880000472068787,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.0949999988079071,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.7195000052452087},{"id":"https://openalex.org/keywords/deception","display_name":"Deception","score":0.6868000030517578},{"id":"https://openalex.org/keywords/trustworthiness","display_name":"Trustworthiness","score":0.6166999936103821},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5403000116348267},{"id":"https://openalex.org/keywords/framing","display_name":"Framing (construction)","score":0.5121999979019165},{"id":"https://openalex.org/keywords/criticism","display_name":"Criticism","score":0.47859999537467957}],"concepts":[{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.7195000052452087},{"id":"https://openalex.org/C2779267917","wikidata":"https://www.wikidata.org/wiki/Q170028","display_name":"Deception","level":2,"score":0.6868000030517578},{"id":"https://openalex.org/C153701036","wikidata":"https://www.wikidata.org/wiki/Q659974","display_name":"Trustworthiness","level":2,"score":0.6166999936103821},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5403000116348267},{"id":"https://openalex.org/C169087156","wikidata":"https://www.wikidata.org/wiki/Q2131593","display_name":"Framing (construction)","level":2,"score":0.5121999979019165},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5112000107765198},{"id":"https://openalex.org/C7991579","wikidata":"https://www.wikidata.org/wiki/Q17955","display_name":"Criticism","level":2,"score":0.47859999537467957},{"id":"https://openalex.org/C17859611","wikidata":"https://www.wikidata.org/wiki/Q10797615","display_name":"Equivocation","level":2,"score":0.42149999737739563},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37700000405311584},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3601999878883362},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.35429999232292175},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.3294000029563904},{"id":"https://openalex.org/C188147891","wikidata":"https://www.wikidata.org/wiki/Q147638","display_name":"Cognitive science","level":1,"score":0.3005000054836273},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.2955000102519989},{"id":"https://openalex.org/C171268870","wikidata":"https://www.wikidata.org/wiki/Q1486676","display_name":"GRASP","level":2,"score":0.2924000024795532},{"id":"https://openalex.org/C2777293324","wikidata":"https://www.wikidata.org/wiki/Q337349","display_name":"Honesty","level":2,"score":0.2809999883174896},{"id":"https://openalex.org/C3017944768","wikidata":"https://www.wikidata.org/wiki/Q1450463","display_name":"Poison control","level":2,"score":0.27709999680519104}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/fllm67465.2025.11391110","is_oa":false,"landing_page_url":"https://doi.org/10.1109/fllm67465.2025.11391110","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 3rd International Conference on Foundation and Large Language Models (FLLM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4624764025211334,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W4210467852","https://openalex.org/W4286665132","https://openalex.org/W4316039865","https://openalex.org/W4360957277","https://openalex.org/W4375949262","https://openalex.org/W4385567149","https://openalex.org/W4385573261","https://openalex.org/W4389523912","https://openalex.org/W4396796749","https://openalex.org/W4399320026","https://openalex.org/W4399564069","https://openalex.org/W4400700791","https://openalex.org/W4402670356","https://openalex.org/W4412158322"],"related_works":[],"abstract_inverted_index":{"This":[0,68],"paper":[1],"presents":[2],"a":[3,25,36,49,71,97,138,166],"focused":[4],"demonstration":[5],"of":[6,88,105,115,160,168,175],"deceptive":[7,130],"behaviour":[8],"in":[9,92,101,149],"Large":[10],"Language":[11],"Models":[12],"(LLMs)":[13],"arising":[14],"under":[15],"poisonous":[16,50,177],"context":[17,178],"injection.":[18],"The":[19,86,113],"case":[20],"study":[21,142],"is":[22,91],"constructed":[23],"around":[24],"Japanese":[26],"haiku,":[27],"selected":[28],"for":[29,38],"its":[30],"inherent":[31],"ambiguity,":[32],"which":[33],"serves":[34],"as":[35,96,145],"probe":[37],"LLM":[39,94],"alignment":[40],"with":[41,48,110],"the":[42,103,119,134,161,173,176,191],"humans\u2019":[43],"real-world":[44],"model.":[45],"When":[46],"presented":[47],"context,":[51],"ChatGPT":[52],"generated":[53],"translation,":[54],"interpretation,":[55],"and":[56,83,100,136,156],"literary":[57],"criticism":[58],"that":[59,78,165],"were":[60],"not":[61],"only":[62],"incorrect":[63],"but":[64],"also":[65],"internally":[66],"inconsistent.":[67],"experiment":[69],"highlights":[70],"fundamental":[72],"risk:":[73],"LLMs":[74,171],"can":[75,127],"produce":[76],"outputs":[77],"are":[79,188],"both":[80],"linguistically":[81],"convincing":[82],"semantically":[84],"deceptive.":[85],"novelty":[87],"this":[89,116,141],"work":[90,117],"framing":[93],"deception":[95],"measurable":[98],"phenomenon":[99],"articulating":[102],"feasibility":[104],"automated":[106],"detection":[107],"through":[108],"cross-verification":[109],"independent":[111],"models.":[112],"contribution":[114],"establishes":[118],"problem":[120,135],"space":[121],"by":[122],"demonstrating":[123],"how":[124],"subtle":[125],"poisoning":[126],"systematically":[128],"induce":[129],"generations.":[131],"By":[132],"formalising":[133],"identifying":[137],"methodological":[139],"direction,":[140],"positions":[143],"itself":[144],"an":[146,150],"initial":[147],"step":[148],"ongoing":[151],"research":[152],"program":[153],"on":[154],"trustworthy":[155],"self-aware":[157],"AI.":[158],"Proof":[159],"concept":[162],"experiments":[163],"demonstrated":[164],"committee":[167],"five":[169],"major":[170],"estimates":[172],"trustworthiness":[174,193],"haiku":[179,186],"interpretations":[180,187],"at":[181,190],"0.57\u00b10.33":[182],"range,":[183],"while":[184],"non-poisoned":[185],"estimated":[189],"0.86\u00b10.15":[192],"range.":[194]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-02-20T00:00:00"}
