{"id":"https://openalex.org/W4415428669","doi":"https://doi.org/10.3233/faia250907","title":"Stealing Knowledge from Auditable Datasets","display_name":"Stealing Knowledge from Auditable Datasets","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428669","doi":"https://doi.org/10.3233/faia250907"},"language":null,"primary_location":{"id":"doi:10.3233/faia250907","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia250907","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia250907","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100674077","display_name":"Hongyu Zhu","orcid":"https://orcid.org/0009-0000-5993-4666"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongyu Zhu","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058729442","display_name":"Sichu Liang","orcid":"https://orcid.org/0009-0009-6798-1118"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sichu Liang","raw_affiliation_strings":["School of Computer Science and Engineering, Southeast University, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044682092","display_name":"Wentao Hu","orcid":"https://orcid.org/0000-0001-8495-4175"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wentao Hu","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111924400","display_name":"Wenwen Wang","orcid":"https://orcid.org/0009-0006-0880-083X"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenwen Wang","raw_affiliation_strings":["Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, USA"],"affiliations":[{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Carnegie Mellon University, Pittsburgh, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023015254","display_name":"Fangqi Li","orcid":"https://orcid.org/0000-0001-7965-5170"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fangqi Li","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101717685","display_name":"Shilin Wang","orcid":"https://orcid.org/0000-0002-8214-6809"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shilin Wang","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070962435","display_name":"Zhuosheng Zhang","orcid":"https://orcid.org/0000-0002-4183-3645"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuosheng Zhang","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5100674077"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.50780033,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.7476000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12761","display_name":"Data Stream Mining Techniques","score":0.7476000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11326","display_name":"Stock Market Forecasting Methods","score":0.6523000001907349,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6299999952316284},{"id":"https://openalex.org/keywords/adversary","display_name":"Adversary","score":0.5421000123023987},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.4399000108242035},{"id":"https://openalex.org/keywords/audit","display_name":"Audit","score":0.41659998893737793},{"id":"https://openalex.org/keywords/digital-watermarking","display_name":"Digital watermarking","score":0.37130001187324524},{"id":"https://openalex.org/keywords/debiasing","display_name":"Debiasing","score":0.3562000095844269},{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.35370001196861267},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.35280001163482666},{"id":"https://openalex.org/keywords/divergence","display_name":"Divergence (linguistics)","score":0.35269999504089355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7809000015258789},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6299999952316284},{"id":"https://openalex.org/C41065033","wikidata":"https://www.wikidata.org/wiki/Q2825412","display_name":"Adversary","level":2,"score":0.5421000123023987},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.4560000002384186},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.4399000108242035},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4207000136375427},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.41659998893737793},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4153999984264374},{"id":"https://openalex.org/C150817343","wikidata":"https://www.wikidata.org/wiki/Q875932","display_name":"Digital watermarking","level":3,"score":0.37130001187324524},{"id":"https://openalex.org/C2779458634","wikidata":"https://www.wikidata.org/wiki/Q24963715","display_name":"Debiasing","level":2,"score":0.3562000095844269},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.35370001196861267},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.35280001163482666},{"id":"https://openalex.org/C207390915","wikidata":"https://www.wikidata.org/wiki/Q1230525","display_name":"Divergence (linguistics)","level":2,"score":0.35269999504089355},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.35179999470710754},{"id":"https://openalex.org/C100980136","wikidata":"https://www.wikidata.org/wiki/Q4668956","display_name":"Malleability","level":4,"score":0.31859999895095825},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.3147999942302704},{"id":"https://openalex.org/C148417208","wikidata":"https://www.wikidata.org/wiki/Q4825882","display_name":"Authentication (law)","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C174839445","wikidata":"https://www.wikidata.org/wiki/Q1134386","display_name":"Lock (firearm)","level":2,"score":0.2994000017642975},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2883000075817108},{"id":"https://openalex.org/C71745522","wikidata":"https://www.wikidata.org/wiki/Q2476929","display_name":"Confidentiality","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C2776452267","wikidata":"https://www.wikidata.org/wiki/Q1503443","display_name":"Secrecy","level":2,"score":0.2863999903202057},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C117797892","wikidata":"https://www.wikidata.org/wiki/Q286363","display_name":"Shadow (psychology)","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C2779696439","wikidata":"https://www.wikidata.org/wiki/Q7512811","display_name":"Signature (topology)","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2718999981880188},{"id":"https://openalex.org/C2779965156","wikidata":"https://www.wikidata.org/wiki/Q5227350","display_name":"Data sharing","level":3,"score":0.2696000039577484},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2667999863624573},{"id":"https://openalex.org/C2776459999","wikidata":"https://www.wikidata.org/wiki/Q2119376","display_name":"Fidelity","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C75291252","wikidata":"https://www.wikidata.org/wiki/Q1315756","display_name":"TRACE (psycholinguistics)","level":2,"score":0.2565999925136566},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C140547941","wikidata":"https://www.wikidata.org/wiki/Q7797194","display_name":"Threat model","level":2,"score":0.2524999976158142}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia250907","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia250907","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia250907","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia250907","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"success":[1],"of":[2,12,87,138,191,227,235],"modern":[3],"deep":[4],"learning":[5,163],"hinges":[6],"on":[7,46],"vast":[8],"training":[9],"data,":[10],"much":[11],"which":[13],"is":[14],"scraped":[15],"from":[16,71,182],"the":[17,84,106,142,167,192,233],"web":[18],"and":[19,28,78,133,146,217,241],"may":[20],"include":[21],"copyrighted":[22],"or":[23,67],"private":[24],"content\u2014raising":[25],"serious":[26],"legal":[27],"ethical":[29],"concerns":[30],"when":[31],"used":[32],"without":[33],"authorization.":[34],"Dataset":[35],"provenance":[36,97,135,172],"seeks":[37],"to":[38,63,91,93,103,115,170,198,220,231],"identify":[39],"whether":[40],"a":[41,126,149,159,183,199,215,224],"model":[42,74],"has":[43],"been":[44],"trained":[45],"specific":[47],"data":[48,56],"collections,":[49],"thus":[50],"protecting":[51],"copyright":[52],"holders":[53],"while":[54,105,202],"preserving":[55],"utility.":[57],"Existing":[58],"techniques":[59],"either":[60],"watermark":[61],"datasets":[62,178],"embed":[64],"distinctive":[65],"behaviors,":[66],"directly":[68],"infer":[69],"usage":[70],"discrepancies":[72],"in":[73,118],"outputs":[75],"between":[76,144],"seen":[77,94],"unseen":[79],"samples.":[80],"These":[81],"approaches":[82],"exploit":[83],"fundamental":[85],"problem":[86],"empirical":[88],"risk":[89],"minimization":[90,213],"overfit":[92],"features.":[95],"Hence,":[96],"signals":[98],"are":[99],"considered":[100],"inherently":[101],"hard":[102],"erase,":[104],"adversary\u2019s":[107],"perspective":[108,156],"remains":[109],"largely":[110],"overlooked,":[111],"limiting":[112],"our":[113],"ability":[114],"assess":[116],"reliability":[117],"real-world":[119],"scenarios.":[120],"In":[121],"this":[122],"work,":[123],"we":[124],"present":[125],"unified":[127],"framework":[128],"that":[129,165],"interprets":[130],"both":[131],"watermarking":[132],"inference-based":[134],"as":[136,148,214],"manifestations":[137],"output":[139],"divergence,":[140],"modeling":[141],"interaction":[143],"auditor":[145],"adversary":[147,229],"min-max":[150],"game":[151],"over":[152,189],"such":[153],"divergences.":[154],"This":[155],"motivates":[157],"DivMin,":[158],"simple":[160],"yet":[161],"effective":[162],"strategy":[164],"minimizes":[166],"relevant":[168],"divergence":[169,212],"suppress":[171],"cues.":[173],"Experiments":[174],"across":[175],"diverse":[176],"image":[177],"demonstrate":[179],"that,":[180],"starting":[181],"pretrained":[184],"vision-language":[185],"model,":[186],"DivMin":[187],"retains":[188],"93%":[190],"full":[193],"fine-tuning":[194],"performance":[195],"gain":[196],"relative":[197],"zero-shot":[200],"baseline,":[201],"evading":[203],"all":[204],"six":[205],"state-of-the-art":[206],"auditing":[207,238],"methods.":[208],"Our":[209],"findings":[210],"establish":[211],"direct":[216],"practical":[218],"path":[219],"obfuscating":[221],"provenance,":[222],"offering":[223],"realistic":[225],"simulation":[226],"potential":[228],"strategies":[230],"guide":[232],"development":[234],"more":[236],"robust":[237],"techniques.":[239],"Code":[240],"Appendix":[242],"will":[243],"be":[244],"available":[245],"at":[246],"https://github.com/GradOpt/DivMin.":[247]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
