{"id":"https://openalex.org/W7118442141","doi":"https://doi.org/10.1145/3779430","title":"Detecting Training Data For Large Language Models: A Survey","display_name":"Detecting Training Data For Large Language Models: A Survey","publication_year":2026,"publication_date":"2026-01-07","ids":{"openalex":"https://openalex.org/W7118442141","doi":"https://doi.org/10.1145/3779430"},"language":"en","primary_location":{"id":"doi:10.1145/3779430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779430","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3779430","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122034507","display_name":"Chen Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chen Yang","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology"],"raw_orcid":"https://orcid.org/0000-0002-3863-5832","affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Junyi Li","orcid":"https://orcid.org/0009-0000-9841-4217"},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Junyi Li","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology"],"raw_orcid":"https://orcid.org/0009-0000-9841-4217","affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104148496","display_name":"Shulin LAN","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165038","display_name":"University of Chinese Academy of Sciences","ror":"https://ror.org/05qbk4x57","country_code":"CN","type":"education","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210165038"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shulin Lan","raw_affiliation_strings":["School of Economics and Management, University of the Chinese Academy of Sciences"],"raw_orcid":"https://orcid.org/0000-0001-5234-0830","affiliations":[{"raw_affiliation_string":"School of Economics and Management, University of the Chinese Academy of Sciences","institution_ids":["https://openalex.org/I4210165038"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122198424","display_name":"Yingchao Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yingchao Wang","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology"],"raw_orcid":"https://orcid.org/0000-0003-2037-7465","affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015890960","display_name":"Hongyang Du","orcid":null},"institutions":[{"id":"https://openalex.org/I889458895","display_name":"University of Hong Kong","ror":"https://ror.org/02zhqgq86","country_code":"HK","type":"education","lineage":["https://openalex.org/I889458895"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Hongyang Du","raw_affiliation_strings":["Department of Electrical and Electronic Engineering, The University of Hong Kong"],"raw_orcid":"https://orcid.org/0000-0002-8220-6525","affiliations":[{"raw_affiliation_string":"Department of Electrical and Electronic Engineering, The University of Hong Kong","institution_ids":["https://openalex.org/I889458895"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050547062","display_name":"Congcheng Gong","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Congcheng Gong","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology"],"raw_orcid":"https://orcid.org/0009-0000-0075-1195","affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122299512","display_name":"Xingshan Yao","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xingshan Yao","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology"],"raw_orcid":"https://orcid.org/0009-0002-2833-6350","affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology","institution_ids":["https://openalex.org/I125839683"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091266202","display_name":"Dusit Niyato","orcid":"https://orcid.org/0000-0002-7442-7416"},"institutions":[{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Dusit (Tao) Niyato","raw_affiliation_strings":["School of Computer Science and Engineering, Nanyang Technological University"],"raw_orcid":"https://orcid.org/0000-0002-7442-7416","affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University","institution_ids":["https://openalex.org/I172675005"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5122133976","display_name":"Liehuang Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I125839683","display_name":"Beijing Institute of Technology","ror":"https://ror.org/01skt4w74","country_code":"CN","type":"education","lineage":["https://openalex.org/I125839683","https://openalex.org/I890469752"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Liehuang Zhu","raw_affiliation_strings":["School of Cyberspace Science and Technology, Beijing Institute of Technology"],"raw_orcid":"https://orcid.org/0000-0003-3277-3887","affiliations":[{"raw_affiliation_string":"School of Cyberspace Science and Technology, Beijing Institute of Technology","institution_ids":["https://openalex.org/I125839683"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5122034507"],"corresponding_institution_ids":["https://openalex.org/I125839683"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.03459879,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"58","issue":"9","first_page":"1","last_page":"35"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.36559998989105225,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.36559998989105225,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.11670000106096268,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.05570000037550926,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/scope","display_name":"Scope (computer science)","score":0.6588000059127808},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5909000039100647},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5573999881744385},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4092999994754791},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.3801000118255615},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.326200008392334},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3093999922275543}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8452000021934509},{"id":"https://openalex.org/C2778012447","wikidata":"https://www.wikidata.org/wiki/Q1034415","display_name":"Scope (computer science)","level":2,"score":0.6588000059127808},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5909000039100647},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5573999881744385},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.5146999955177307},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4244999885559082},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.3801000118255615},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.326200008392334},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3176000118255615},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3093999922275543},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.2930000126361847},{"id":"https://openalex.org/C169093310","wikidata":"https://www.wikidata.org/wiki/Q3702971","display_name":"Personally identifiable information","level":2,"score":0.2906999886035919},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.28349998593330383},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26350000500679016},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.25189998745918274},{"id":"https://openalex.org/C69360830","wikidata":"https://www.wikidata.org/wiki/Q1172237","display_name":"Data Protection Act 1998","level":2,"score":0.25049999356269836},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.25040000677108765}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3779430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779430","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},{"id":"pmh:oai:dr.ntu.edu.sg:10356/205654","is_oa":false,"landing_page_url":"https://hdl.handle.net/10356/205654","pdf_url":null,"source":{"id":"https://openalex.org/S4306402609","display_name":"DR-NTU (Nanyang Technological University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I172675005","host_organization_name":"Nanyang Technological University","host_organization_lineage":["https://openalex.org/I172675005"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1145/3779430","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3779430","pdf_url":null,"source":{"id":"https://openalex.org/S157921468","display_name":"ACM Computing Surveys","issn_l":"0360-0300","issn":["0360-0300","1557-7341"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Computing Surveys","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.4157191514968872,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G4933354543","display_name":null,"funder_award_id":"2023CX01020","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G662429793","display_name":null,"funder_award_id":"62472035, U24B20148, 72201266, 72192843, and 72192844","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":38,"referenced_works":["https://openalex.org/W1488996941","https://openalex.org/W2535690855","https://openalex.org/W2796541946","https://openalex.org/W2800224756","https://openalex.org/W2884943453","https://openalex.org/W2952604841","https://openalex.org/W3034368386","https://openalex.org/W3035261884","https://openalex.org/W3081096564","https://openalex.org/W3081886688","https://openalex.org/W3106191616","https://openalex.org/W3138815606","https://openalex.org/W3139086469","https://openalex.org/W3154109599","https://openalex.org/W3213407400","https://openalex.org/W3215966579","https://openalex.org/W4229053728","https://openalex.org/W4255421341","https://openalex.org/W4288057780","https://openalex.org/W4367595583","https://openalex.org/W4379652910","https://openalex.org/W4384270559","https://openalex.org/W4385565597","https://openalex.org/W4385571225","https://openalex.org/W4385573569","https://openalex.org/W4385573947","https://openalex.org/W4385734176","https://openalex.org/W4386076140","https://openalex.org/W4389009541","https://openalex.org/W4389519044","https://openalex.org/W4389520705","https://openalex.org/W4391494845","https://openalex.org/W4401588680","https://openalex.org/W4402670101","https://openalex.org/W4403536772","https://openalex.org/W4404784117","https://openalex.org/W4411120469","https://openalex.org/W4412888481"],"related_works":[],"abstract_inverted_index":{"As":[0],"large":[1],"language":[2],"models":[3,88],"(LLMs)":[4],"continue":[5],"to":[6,39,55,62,128,149],"evolve,":[7],"the":[8,21,47,63,68,117,134,145,150,152,160],"scope":[9],"and":[10,158,163],"diversity":[11],"of":[12,24,65,107,109,119,136],"data":[13,34,70,82,139],"used":[14,72],"for":[15,73,140,166],"training":[16,22,81,138],"are":[17],"expanding":[18],"significantly.":[19],"However,":[20,98],"dataset":[23],"LLMs":[25,101],"may":[26],"inevitably":[27],"contain":[28],"sensitive":[29],"information":[30,148],"such":[31],"as":[32],"personal":[33],"or":[35,42,52],"copyrighted":[36],"material,":[37],"leading":[38],"privacy":[40],"leakage":[41],"copyright":[43],"infringement":[44],"risks":[45],"if":[46],"model":[48],"generates":[49],"highly":[50],"similar":[51],"identical":[53],"text":[54,69],"these":[56],"sources.":[57],"This":[58],"has":[59,89,115],"drawn":[60],"attention":[61],"issue":[64],"detecting":[66,80,137],"whether":[67],"is":[71],"LLM":[74],"training.":[75],"To":[76],"date,":[77],"research":[78,110,168],"on":[79,92,100],"usage":[83],"in":[84,112,169],"artificial":[85],"intelligence":[86],"(AI)":[87],"mainly":[90],"focused":[91],"traditional":[93],"machine":[94],"learning":[95],"(ML)":[96],"models.":[97],"studies":[99],"remain":[102],"relatively":[103],"immature.":[104],"The":[105],"lack":[106],"understanding":[108],"progress":[111],"this":[113,125,130,170],"area":[114],"hindered":[116],"development":[118],"more":[120],"effective":[121],"detection":[122,154],"methods.":[123],"Therefore,":[124],"article":[126],"aims":[127],"address":[129],"gap":[131],"by":[132],"conducting":[133],"analysis":[135],"LLM.":[141],"Specifically,":[142],"we":[143],"analyze":[144],"available":[146],"LLM\u2019s":[147],"detector,":[151],"main":[153],"methods,":[155],"determination":[156],"metrics,":[157],"discuss":[159],"technical":[161],"challenges":[162],"potential":[164],"directions":[165],"future":[167],"field.":[171]},"counts_by_year":[],"updated_date":"2026-05-21T09:19:25.381259","created_date":"2026-01-08T00:00:00"}
