{"id":"https://openalex.org/W7077484435","doi":"https://doi.org/10.48550/arxiv.2508.15934","title":"Strategic Sample Selection for Improved Clean-Label Backdoor Attacks in Text Classification","display_name":"Strategic Sample Selection for Improved Clean-Label Backdoor Attacks in Text Classification","publication_year":2025,"publication_date":"2025-08-21","ids":{"openalex":"https://openalex.org/W7077484435","doi":"https://doi.org/10.48550/arxiv.2508.15934"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2508.15934","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.15934","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2508.15934","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Kirci, Onur Alp","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kirci, Onur Alp","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Gursoy, M. Emre","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gursoy, M. Emre","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.631600022315979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12157","display_name":"Geochemistry and Geologic Mapping","score":0.631600022315979,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13067","display_name":"Geological Modeling and Analysis","score":0.03180000185966492,"subfield":{"id":"https://openalex.org/subfields/1906","display_name":"Geochemistry and Petrology"},"field":{"id":"https://openalex.org/fields/19","display_name":"Earth and Planetary Sciences"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14311","display_name":"Electrical and Electromagnetic Research","score":0.01979999989271164,"subfield":{"id":"https://openalex.org/subfields/3107","display_name":"Atomic and Molecular Physics, and Optics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/backdoor","display_name":"Backdoor","score":0.9937000274658203},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6794999837875366},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.5519999861717224},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.38960000872612},{"id":"https://openalex.org/keywords/negative-selection","display_name":"Negative selection","score":0.33739998936653137},{"id":"https://openalex.org/keywords/state","display_name":"State (computer science)","score":0.30970001220703125}],"concepts":[{"id":"https://openalex.org/C2781045450","wikidata":"https://www.wikidata.org/wiki/Q254569","display_name":"Backdoor","level":2,"score":0.9937000274658203},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6794999837875366},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6470000147819519},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.5519999861717224},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5153999924659729},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4223000109195709},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.38960000872612},{"id":"https://openalex.org/C7386963","wikidata":"https://www.wikidata.org/wiki/Q3954859","display_name":"Negative selection","level":4,"score":0.33739998936653137},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.32910001277923584},{"id":"https://openalex.org/C48103436","wikidata":"https://www.wikidata.org/wiki/Q599031","display_name":"State (computer science)","level":2,"score":0.30970001220703125},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2924000024795532},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.27880001068115234},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.2669000029563904},{"id":"https://openalex.org/C174333608","wikidata":"https://www.wikidata.org/wiki/Q19635","display_name":"Trojan","level":2,"score":0.2563000023365021}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2508.15934","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.15934","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2508.15934","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2508.15934","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Backdoor":[0],"attacks":[1,21,33,107,159],"pose":[2],"a":[3,84,166],"significant":[4],"threat":[5],"to":[6,47,82,100],"the":[7,64,88,92,132,136,141,153,169],"integrity":[8],"of":[9,103,168],"text":[10],"classification":[11],"models":[12],"used":[13],"in":[14,51,152,174],"natural":[15],"language":[16],"processing.":[17],"While":[18],"several":[19],"dirty-label":[20],"that":[22,131],"achieve":[23],"high":[24],"attack":[25,49,172],"success":[26],"rates":[27],"(ASR)":[28],"have":[29],"been":[30],"proposed,":[31],"clean-label":[32,52,101,158,171],"are":[34],"inherently":[35],"more":[36],"difficult.":[37],"In":[38],"this":[39],"paper,":[40],"we":[41,80],"propose":[42],"three":[43,116],"sample":[44,145],"selection":[45,146],"strategies":[46,59,163],"improve":[48,140],"effectiveness":[50],"scenarios:":[53],"Minimum,":[54],"Above50,":[55],"and":[56,72,91,112,121],"Below50.":[57],"Our":[58],"identify":[60],"those":[61],"samples":[62],"which":[63],"model":[65,123],"predicts":[66],"incorrectly":[67],"or":[68,149],"with":[69,147],"low":[70],"confidence,":[71],"by":[73,161],"injecting":[74],"backdoor":[75,106],"triggers":[76],"into":[77],"such":[78],"samples,":[79],"aim":[81],"induce":[83],"stronger":[85],"association":[86],"between":[87],"trigger":[89],"patterns":[90],"attacker-desired":[93],"target":[94],"label.":[95],"We":[96],"apply":[97],"our":[98,162],"methods":[99],"variants":[102],"four":[104,122],"canonical":[105],"(InsertSent,":[108],"WordInj,":[109],"StyleBkd,":[110],"SynBkd)":[111],"evaluate":[113],"them":[114],"on":[115],"datasets":[117],"(IMDB,":[118],"SST2,":[119],"HateSpeech)":[120],"types":[124],"(LSTM,":[125],"BERT,":[126],"DistilBERT,":[127],"RoBERTa).":[128],"Results":[129],"show":[130],"proposed":[133],"strategies,":[134],"particularly":[135],"Minimum":[137],"strategy,":[138],"significantly":[139],"ASR":[142],"over":[143],"random":[144],"little":[148],"no":[150],"degradation":[151],"model's":[154],"clean":[155],"accuracy.":[156],"Furthermore,":[157],"enhanced":[160],"outperform":[164],"BITE,":[165],"state":[167],"art":[170],"method,":[173],"many":[175],"configurations.":[176]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
