{"id":"https://openalex.org/W7130736677","doi":"https://doi.org/10.1109/tfuzz.2026.3665818","title":"Auditing Partial Dataset Usage in Large Language Models via Fuzzy Membership Aggregation","display_name":"Auditing Partial Dataset Usage in Large Language Models via Fuzzy Membership Aggregation","publication_year":2026,"publication_date":"2026-02-20","ids":{"openalex":"https://openalex.org/W7130736677","doi":"https://doi.org/10.1109/tfuzz.2026.3665818"},"language":null,"primary_location":{"id":"doi:10.1109/tfuzz.2026.3665818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tfuzz.2026.3665818","pdf_url":null,"source":{"id":"https://openalex.org/S134177497","display_name":"IEEE Transactions on Fuzzy Systems","issn_l":"1063-6706","issn":["1063-6706","1941-0034"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Fuzzy Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126450706","display_name":"Hongyu Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Hongyu Zhu","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0009-0570-8626","affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058729442","display_name":"Sichu Liang","orcid":"https://orcid.org/0009-0009-6798-1118"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Sichu Liang","raw_affiliation_strings":["School of Computer Science and Engineering, Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0009-6798-1118","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5063857788","display_name":"Bofan Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bofan Chen","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0009-0007-3241-6122","affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100994935","display_name":"Shi-Lin Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shi-Lin Wang","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-8214-6809","affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126496751","display_name":"Zhuosheng Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhuosheng Zhang","raw_affiliation_strings":["School of Computer Science, Shanghai Jiao Tong University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-4183-3645","affiliations":[{"raw_affiliation_string":"School of Computer Science, Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5126460220","display_name":"Weiping Ding","orcid":null},"institutions":[{"id":"https://openalex.org/I199305430","display_name":"Nantong University","ror":"https://ror.org/02afcvw97","country_code":"CN","type":"education","lineage":["https://openalex.org/I199305430"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weiping Ding","raw_affiliation_strings":["School of Artificial Intelligence and Computer Science, Nantong University, Nantong, China"],"raw_orcid":"https://orcid.org/0000-0002-3180-7347","affiliations":[{"raw_affiliation_string":"School of Artificial Intelligence and Computer Science, Nantong University, Nantong, China","institution_ids":["https://openalex.org/I199305430"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5126450706"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.27620511,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"34","issue":"4","first_page":"1064","last_page":"1078"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.13760000467300415,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T14347","display_name":"Big Data and Digital Economy","score":0.13760000467300415,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.12359999865293503,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.12290000170469284,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.5979999899864197},{"id":"https://openalex.org/keywords/audit","display_name":"Audit","score":0.5289000272750854},{"id":"https://openalex.org/keywords/fuzzy-set","display_name":"Fuzzy set","score":0.5277000069618225},{"id":"https://openalex.org/keywords/fuzzy-logic","display_name":"Fuzzy logic","score":0.5227000117301941},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.4846000075340271},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4715000092983246},{"id":"https://openalex.org/keywords/binary-classification","display_name":"Binary classification","score":0.35600000619888306},{"id":"https://openalex.org/keywords/adaptive-neuro-fuzzy-inference-system","display_name":"Adaptive neuro fuzzy inference system","score":0.3476000130176544}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7289999723434448},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.5979999899864197},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5741999745368958},{"id":"https://openalex.org/C199521495","wikidata":"https://www.wikidata.org/wiki/Q181487","display_name":"Audit","level":2,"score":0.5289000272750854},{"id":"https://openalex.org/C42011625","wikidata":"https://www.wikidata.org/wiki/Q1055058","display_name":"Fuzzy set","level":3,"score":0.5277000069618225},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.5227000117301941},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5023000240325928},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.4846000075340271},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4715000092983246},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.43299999833106995},{"id":"https://openalex.org/C66905080","wikidata":"https://www.wikidata.org/wiki/Q17005494","display_name":"Binary classification","level":3,"score":0.35600000619888306},{"id":"https://openalex.org/C186108316","wikidata":"https://www.wikidata.org/wiki/Q352530","display_name":"Adaptive neuro fuzzy inference system","level":4,"score":0.3476000130176544},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.3449000120162964},{"id":"https://openalex.org/C169087156","wikidata":"https://www.wikidata.org/wiki/Q2131593","display_name":"Framing (construction)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3050000071525574},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3010999858379364},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.28110000491142273},{"id":"https://openalex.org/C2779190172","wikidata":"https://www.wikidata.org/wiki/Q4913888","display_name":"Binary data","level":3,"score":0.2754000127315521},{"id":"https://openalex.org/C100776233","wikidata":"https://www.wikidata.org/wiki/Q2532492","display_name":"Bridge (graph theory)","level":2,"score":0.2703999876976013},{"id":"https://openalex.org/C3020028006","wikidata":"https://www.wikidata.org/wiki/Q9158","display_name":"Electronic mail","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2565999925136566}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tfuzz.2026.3665818","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tfuzz.2026.3665818","pdf_url":null,"source":{"id":"https://openalex.org/S134177497","display_name":"IEEE Transactions on Fuzzy Systems","issn_l":"1063-6706","issn":["1063-6706","1941-0034"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Fuzzy Systems","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7275914549827576,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[{"id":"https://openalex.org/G1581597255","display_name":null,"funder_award_id":"62576178","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4286351655","display_name":null,"funder_award_id":"U2433216","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G4756121660","display_name":null,"funder_award_id":"24JG0500302","funder_id":"https://openalex.org/F4320321885","funder_display_name":"Science and Technology Commission of Shanghai Municipality"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321885","display_name":"Science and Technology Commission of Shanghai Municipality","ror":"https://ror.org/03kt66j61"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"remarkable":[1],"capabilities":[2],"of":[3,81,102,162,178,201,204,254],"Large":[4],"Language":[5],"Models":[6],"(LLMs)":[7],"are":[8,91,98],"fueled":[9],"by":[10,151],"massive":[11],"internet-scale":[12],"corpora.":[13],"However,":[14],"scraped":[15],"data":[16],"owners":[17],"often":[18],"do":[19],"not":[20],"consent":[21],"to":[22,43,69,165,248],"its":[23],"use":[24],"for":[25],"training,":[26],"raising":[27],"significant":[28],"legal":[29],"and":[30,35,67,146,194,257,279],"ethical":[31],"concerns":[32],"over":[33],"copyright":[34],"privacy.":[36],"<italic":[37,115,129,152,168],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[38,116,130,153,169,213,242,263],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Data":[39],"auditing</i>":[40],"techniques":[41,271],"seek":[42],"verify":[44],"whether":[45],"a":[46,53,70,78,119,166,187,205,226,240],"protected":[47,206],"dataset":[48,120,132,207],"was":[49],"used":[50,93,208],"in":[51,86,94,128,172,180,225,272],"training":[52,88,183],"target":[54],"LLM,":[55],"typically":[56],"framing":[57],"the":[58,140,159,181,195,202],"task":[59],"as":[60,236],"membership":[61,66,164],"inference:":[62],"estimating":[63,276],"binary":[64,84,163,223,270],"sample-level":[65,191,219],"aggregating":[68],"dataset-level":[71,196,233],"decision.":[72],"In":[73],"this":[74,82],"paper,":[75],"we":[76,157],"identify":[77],"fundamental":[79],"limitation":[80],"crisp":[83,160,269],"paradigm:":[85],"realistic":[87],"pipelines,":[89],"datasets":[90],"rarely":[92],"full.":[95],"Instead,":[96],"models":[97],"trained":[99],"on":[100],"mixtures":[101],"partial":[103,250,274],"subsets":[104],"drawn":[105],"from":[106,126,222],"multiple":[107,258],"sources.":[108],"Existing":[109],"auditing":[110,259],"techniques,":[111],"built":[112],"upon":[113],"an":[114],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">all-or-none</i>":[117],"assumption\u2014declaring":[118],"either":[121],"entirely":[122],"present":[123],"or":[124],"absent":[125],"training\u2014collapse":[127],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">partial":[131],"usage</i>":[133],"scenarios.":[134],"Their":[135],"predictions":[136],"fluctuate":[137],"unpredictably":[138],"with":[139],"member":[141,234,277,282],"ratio,":[142,198],"causing":[143],"unstable":[144],"performance":[145],"high":[147],"false-negative":[148],"rates.":[149],"Inspired":[150],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">fuzzy":[154,170],"set":[155],"theory</i>,":[156],"relax":[158],"notion":[161],"continuous":[167],"membership</i>":[171],"[0,1],":[173],"quantifying":[174],"each":[175],"sample's":[176],"degree":[177],"inclusion":[179],"model's":[182],"set.":[184],"We":[185],"establish":[186],"theoretical":[188],"bridge":[189],"between":[190],"fuzzy":[192,220],"memberships":[193,221,247],"usage":[197],"facilitating":[199],"inference":[200],"proportion":[203],"during":[209],"training.":[210],"A":[211],"<bold":[212,241,262],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">neural":[214],"network":[215],"fuzzifier</b>":[216],"first":[217],"estimates":[218],"labels":[224],"reference":[227],"set,":[228],"then":[229],"refines":[230],"them":[231],"using":[232],"ratios":[235],"higher-order":[237],"supervision.":[238],"Finally,":[239],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">defuzzification</b>":[243],"stage":[244],"aggregates":[245],"calibrated":[246],"determine":[249],"usage.":[251],"Across":[252],"LLMs":[253],"varying":[255],"scales":[256],"datasets,":[260],"our":[261],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Fuzzy":[264],"Auditor</b>":[265],"substantially":[266],"outperforms":[267],"state-of-the-art":[268],"detecting":[273],"usage,":[275],"proportions,":[278],"identifying":[280],"individual":[281],"samples.":[283]},"counts_by_year":[],"updated_date":"2026-04-02T13:48:15.688549","created_date":"2026-02-21T00:00:00"}
