{"id":"https://openalex.org/W4412886867","doi":"https://doi.org/10.18653/v1/2025.acl-long.60","title":"Re-identification of De-identified Documents with Autoregressive Infilling","display_name":"Re-identification of De-identified Documents with Autoregressive Infilling","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4412886867","doi":"https://doi.org/10.18653/v1/2025.acl-long.60"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2025.acl-long.60","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.60","pdf_url":"https://aclanthology.org/2025.acl-long.60.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.acl-long.60.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5026075659","display_name":"Lucas Georges Gabriel Charpentier","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lucas Georges Gabriel Charpentier","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5008574874","display_name":"Pierre Lison","orcid":"https://orcid.org/0000-0002-7649-0689"},"institutions":[{"id":"https://openalex.org/I144648426","display_name":"Norwegian Computing Center","ror":"https://ror.org/02gm7te43","country_code":"NO","type":"nonprofit","lineage":["https://openalex.org/I144648426"]}],"countries":["NO"],"is_corresponding":false,"raw_author_name":"Pierre Lison","raw_affiliation_strings":["University of Oslo Language Technology Group Norwegian Computing Center (NR) , Oslo"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Oslo Language Technology Group Norwegian Computing Center (NR) , Oslo","institution_ids":["https://openalex.org/I144648426"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9349,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.77169288,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1192","last_page":"1209"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9593999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12357","display_name":"Digital Media Forensic Detection","score":0.9593999981880188,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6199003458023071},{"id":"https://openalex.org/keywords/autoregressive-model","display_name":"Autoregressive model","score":0.5770527124404907},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5624379515647888},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.32764604687690735},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12777671217918396},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.10523754358291626}],"concepts":[{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6199003458023071},{"id":"https://openalex.org/C159877910","wikidata":"https://www.wikidata.org/wiki/Q2202883","display_name":"Autoregressive model","level":2,"score":0.5770527124404907},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5624379515647888},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.32764604687690735},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12777671217918396},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.10523754358291626},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.acl-long.60","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.60","pdf_url":"https://aclanthology.org/2025.acl-long.60.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.acl-long.60","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.acl-long.60","pdf_url":"https://aclanthology.org/2025.acl-long.60.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 63rd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.5199999809265137,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412886867.pdf","grobid_xml":"https://content.openalex.org/works/W4412886867.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2171218219","https://openalex.org/W1972271943","https://openalex.org/W2150410159","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345"],"abstract_inverted_index":{"Documents":[0],"revealing":[1],"sensitive":[2],"information":[3,21],"about":[4],"individuals":[5],"must":[6],"typically":[7],"be":[8,144,165],"deidentified.This":[9],"de-identification":[10,41],"is":[11,111],"often":[12],"done":[13],"by":[14],"masking":[15],"all":[16,114],"mentions":[17],"of":[18,32,40,54,60,139,157],"personally":[19],"identifiable":[20],"(PII),":[22],"thereby":[23],"making":[24],"it":[25],"more":[26],"difficult":[27],"to":[28,97,103],"uncover":[29],"the":[30,33,38,51,74,84,91,120,149,155],"identity":[31],"person(s)":[34],"in":[35,67,77],"question.To":[36],"investigate":[37],"robustness":[39],"methods,":[42],"we":[43],"present":[44],"a":[45,58,65],"novel,":[46],"RAG-inspired":[47],"approach":[48],"that":[49,101,133],"attempts":[50],"reverse":[52],"process":[53,110],"re-identification":[55,75,121,150],"based":[56],"on":[57,122],"database":[59],"documents":[61],"representing":[62],"background":[63,85,158],"knowledge.Given":[64],"text":[66,106,141],"which":[68],"personal":[69],"identifiers":[70],"have":[71],"been":[72],"masked,":[73],"proceeds":[76],"two":[78],"steps.A":[79],"retriever":[80],"first":[81],"selects":[82],"from":[83],"knowledge":[86],"passages":[87,93],"deemed":[88],"relevant":[89],"for":[90,161],"re-identification.Those":[92],"are":[94,117],"then":[95],"provided":[96],"an":[98],"infilling":[99],"model":[100],"seeks":[102],"infer":[104],"each":[105],"span's":[107],"original":[108],"content.This":[109],"repeated":[112],"until":[113],"masked":[115],"spans":[116,142],"replaced.We":[118],"evaluate":[119],"three":[123],"datasets":[124],"(Wikipedia":[125],"biographies,":[126],"court":[127],"rulings":[128],"and":[129,147],"clinical":[130],"notes).Results":[131],"show":[132],"(1)":[134],"as":[135,137],"many":[136],"80%":[138],"de-identified":[140],"can":[143,164],"successfully":[145],"recovered":[146],"(2)":[148],"accuracy":[151],"increases":[152],"along":[153],"with":[154],"level":[156],"knowledge.The":[159],"code":[160],"this":[162],"paper":[163],"found":[166],"at:":[167],"https://github.com/ltgoslo/":[168],"re-identification-infilling.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
