{"id":"https://openalex.org/W7140088376","doi":"https://doi.org/10.18653/v1/2026.eacl-long.49","title":"Detecting Training Data of Large Language Models via Expectation Maximization","display_name":"Detecting Training Data of Large Language Models via Expectation Maximization","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7140088376","doi":"https://doi.org/10.18653/v1/2026.eacl-long.49"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2026.eacl-long.49","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.49","pdf_url":"https://aclanthology.org/2026.eacl-long.49.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2026.eacl-long.49.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5130334400","display_name":"Gyuwan Kim","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gyuwan Kim","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130399921","display_name":"Yang Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang Li","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055466746","display_name":"Evangelia Spiliopoulou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Evangelia Spiliopoulou","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130364636","display_name":"Jie Ma","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jie Ma","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5130330929","display_name":"William Yang Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"William Yang Wang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.39357778,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1115","last_page":"1129"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2865999937057495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2865999937057495,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.1598999947309494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.06669999659061432,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5483999848365784},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.40130001306533813},{"id":"https://openalex.org/keywords/expectation\u2013maximization-algorithm","display_name":"Expectation\u2013maximization algorithm","score":0.38589999079704285},{"id":"https://openalex.org/keywords/maximization","display_name":"Maximization","score":0.3336000144481659},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.3075999915599823}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6460000276565552},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5483999848365784},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5465999841690063},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.42179998755455017},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.40130001306533813},{"id":"https://openalex.org/C182081679","wikidata":"https://www.wikidata.org/wiki/Q1275153","display_name":"Expectation\u2013maximization algorithm","level":3,"score":0.38589999079704285},{"id":"https://openalex.org/C2776330181","wikidata":"https://www.wikidata.org/wiki/Q18358244","display_name":"Maximization","level":2,"score":0.3336000144481659},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.3075999915599823},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.288100004196167},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.28119999170303345},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.26489999890327454},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2578999996185303}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2026.eacl-long.49","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.49","pdf_url":"https://aclanthology.org/2026.eacl-long.49.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2026.eacl-long.49","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2026.eacl-long.49","pdf_url":"https://aclanthology.org/2026.eacl-long.49.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th Conference of the European Chapter of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.4571631848812103,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W7140088376.pdf","grobid_xml":"https://content.openalex.org/works/W7140088376.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Gyuwan":[0],"Kim,":[1],"Yang":[2,9],"Li,":[3],"Evangelia":[4],"Spiliopoulou,":[5],"Jie":[6],"Ma,":[7],"William":[8],"Wang.":[10],"Proceedings":[11],"of":[12,16,20],"the":[13,17,21],"19th":[14],"Conference":[15],"European":[18],"Chapter":[19],"Association":[22],"for":[23],"Computational":[24],"Linguistics":[25],"(Volume":[26],"1:":[27],"Long":[28],"Papers).":[29],"2026.":[30]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-24T00:00:00"}
