{"id":"https://openalex.org/W7152507658","doi":"https://doi.org/10.48550/arxiv.2604.06285","title":"Harnessing Hyperbolic Geometry for Harmful Prompt Detection and Sanitization","display_name":"Harnessing Hyperbolic Geometry for Harmful Prompt Detection and Sanitization","publication_year":2026,"publication_date":"2026-04-07","ids":{"openalex":"https://openalex.org/W7152507658","doi":"https://doi.org/10.48550/arxiv.2604.06285"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06285","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06285","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06285","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5115460904","display_name":"Igor Maljkovic","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Maljkovic, Igor","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133266816","display_name":"Maria Rosaria Briglia","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Briglia, Maria Rosaria","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133247840","display_name":"Iacopo Masi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Masi, Iacopo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133245794","display_name":"Antonio Emanuele Cin\u00e0","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cin\u00e0, Antonio Emanuele","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133261911","display_name":"Fabio Roli","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Roli, Fabio","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.8256999850273132,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.8256999850273132,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.06419999897480011,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10883","display_name":"Ethics and Social Impacts of AI","score":0.011699999682605267,"subfield":{"id":"https://openalex.org/subfields/3311","display_name":"Safety Research"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/flexibility","display_name":"Flexibility (engineering)","score":0.5230000019073486},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5196999907493591},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.48649999499320984},{"id":"https://openalex.org/keywords/anomaly-detection","display_name":"Anomaly detection","score":0.47040000557899475},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.40310001373291016},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.37940001487731934},{"id":"https://openalex.org/keywords/safeguarding","display_name":"Safeguarding","score":0.36239999532699585},{"id":"https://openalex.org/keywords/camouflage","display_name":"Camouflage","score":0.34950000047683716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.684499979019165},{"id":"https://openalex.org/C2780598303","wikidata":"https://www.wikidata.org/wiki/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.5230000019073486},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5196999907493591},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.48649999499320984},{"id":"https://openalex.org/C739882","wikidata":"https://www.wikidata.org/wiki/Q3560506","display_name":"Anomaly detection","level":2,"score":0.47040000557899475},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.4526999890804291},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.40310001373291016},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.37940001487731934},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3765999972820282},{"id":"https://openalex.org/C2776743756","wikidata":"https://www.wikidata.org/wiki/Q5097921","display_name":"Safeguarding","level":2,"score":0.36239999532699585},{"id":"https://openalex.org/C2776196576","wikidata":"https://www.wikidata.org/wiki/Q196113","display_name":"Camouflage","level":2,"score":0.34950000047683716},{"id":"https://openalex.org/C83677898","wikidata":"https://www.wikidata.org/wiki/Q1878538","display_name":"Hyperbolic space","level":2,"score":0.3400999903678894},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3296000063419342},{"id":"https://openalex.org/C35525427","wikidata":"https://www.wikidata.org/wiki/Q745881","display_name":"Intrusion detection system","level":2,"score":0.31520000100135803},{"id":"https://openalex.org/C2779585090","wikidata":"https://www.wikidata.org/wiki/Q3457762","display_name":"Resilience (materials science)","level":2,"score":0.2955999970436096},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.29510000348091125},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.2928999960422516},{"id":"https://openalex.org/C64357122","wikidata":"https://www.wikidata.org/wiki/Q1149766","display_name":"Causality (physics)","level":2,"score":0.287200003862381},{"id":"https://openalex.org/C541664917","wikidata":"https://www.wikidata.org/wiki/Q14001","display_name":"Malware","level":2,"score":0.2870999872684479},{"id":"https://openalex.org/C2781345505","wikidata":"https://www.wikidata.org/wiki/Q2535979","display_name":"Blacklist","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.2736000120639801},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C108801101","wikidata":"https://www.wikidata.org/wiki/Q15032","display_name":"Steganography","level":3,"score":0.25130000710487366},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.2508000135421753}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06285","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06285","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06285","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06285","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Vision-Language":[0],"Models":[1],"(VLMs)":[2],"have":[3],"become":[4],"essential":[5],"for":[6],"tasks":[7],"such":[8],"as":[9,110],"image":[10],"synthesis,":[11],"captioning,":[12],"and":[13,18,66,83,106,124,146,162,166,172],"retrieval":[14],"by":[15,117],"aligning":[16],"textual":[17],"visual":[19],"information":[20],"in":[21,158],"a":[22,90],"shared":[23],"embedding":[24],"space.":[25],"Yet,":[26],"this":[27,115],"flexibility":[28],"also":[29],"makes":[30],"them":[31],"vulnerable":[32],"to":[33,37,102,122,175],"malicious":[34,179],"prompts":[35,105],"designed":[36],"produce":[38],"unsafe":[39,130],"content,":[40],"raising":[41],"critical":[42],"safety":[43],"concerns.":[44],"Existing":[45],"defenses":[46,157],"either":[47],"rely":[48],"on":[49,57,114],"blacklist":[50],"filters,":[51],"which":[52,63],"are":[53,64],"easily":[54],"circumvented,":[55],"or":[56],"heavy":[58],"classifier-based":[59],"systems,":[60],"both":[61,159],"of":[62,99,137],"costly":[65],"fragile":[67],"under":[68],"embedding-level":[69],"attacks.":[70],"We":[71],"address":[72],"these":[73],"challenges":[74],"with":[75],"two":[76],"complementary":[77],"components:":[78],"Hyperbolic":[79,84],"Prompt":[80,85],"Espial":[81],"(HyPE)":[82],"Sanitization":[86],"(HyPS).":[87],"HyPE":[88,165],"is":[89],"lightweight":[91],"anomaly":[92],"detector":[93],"that":[94,151],"leverages":[95],"the":[96,134],"structured":[97],"geometry":[98],"hyperbolic":[100],"space":[101],"model":[103],"benign":[104],"detect":[107],"harmful":[108,127],"ones":[109],"outliers.":[111],"HyPS":[112,167],"builds":[113],"detection":[116,160],"applying":[118],"explainable":[119],"attribution":[120],"methods":[121],"identify":[123],"selectively":[125],"modify":[126],"words,":[128],"neutralizing":[129],"intent":[131],"while":[132],"preserving":[133],"original":[135],"semantics":[136],"user":[138],"prompts.":[139],"Through":[140],"extensive":[141],"experiments":[142],"across":[143],"multiple":[144],"datasets":[145],"adversarial":[147],"scenarios,":[148],"we":[149],"prove":[150],"our":[152],"framework":[153],"consistently":[154],"outperforms":[155],"prior":[156],"accuracy":[161],"robustness.":[163],"Together,":[164],"offer":[168],"an":[169],"efficient,":[170],"interpretable,":[171],"resilient":[173],"approach":[174],"safeguarding":[176],"VLMs":[177],"against":[178],"prompt":[180],"misuse.":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-10T00:00:00"}
