{"id":"https://openalex.org/W3048742277","doi":"https://doi.org/10.1109/infocomwkshps50562.2020.9162683","title":"Using NLP and Machine Learning to Detect Data Privacy Violations","display_name":"Using NLP and Machine Learning to Detect Data Privacy Violations","publication_year":2020,"publication_date":"2020-07-01","ids":{"openalex":"https://openalex.org/W3048742277","doi":"https://doi.org/10.1109/infocomwkshps50562.2020.9162683","mag":"3048742277"},"language":"en","primary_location":{"id":"doi:10.1109/infocomwkshps50562.2020.9162683","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocomwkshps50562.2020.9162683","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2020 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://hdl.handle.net/10316/95068","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100345291","display_name":"Paulo Silva","orcid":"https://orcid.org/0000-0002-2306-2242"},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"Paulo Silva","raw_affiliation_strings":["CISUC, University of Coimbra, Coimbra, Portugal"],"affiliations":[{"raw_affiliation_string":"CISUC, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102820080","display_name":"Carolina Gon\u00e7alves","orcid":"https://orcid.org/0000-0001-9612-8495"},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Carolina Goncalves","raw_affiliation_strings":["CISUC, University of Coimbra, Coimbra, Portugal"],"affiliations":[{"raw_affiliation_string":"CISUC, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055928799","display_name":"Carolina Godinho","orcid":null},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Carolina Godinho","raw_affiliation_strings":["CISUC, University of Coimbra, Coimbra, Portugal"],"affiliations":[{"raw_affiliation_string":"CISUC, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030619096","display_name":"Nuno Antunes","orcid":"https://orcid.org/0000-0002-6044-4012"},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Nuno Antunes","raw_affiliation_strings":["CISUC, University of Coimbra, Coimbra, Portugal"],"affiliations":[{"raw_affiliation_string":"CISUC, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5022354164","display_name":"Mar\u00edlia Curado","orcid":"https://orcid.org/0000-0001-6760-4675"},"institutions":[{"id":"https://openalex.org/I76903346","display_name":"University of Coimbra","ror":"https://ror.org/04z8k9a98","country_code":"PT","type":"education","lineage":["https://openalex.org/I76903346"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Marilia Curado","raw_affiliation_strings":["CISUC, University of Coimbra, Coimbra, Portugal"],"affiliations":[{"raw_affiliation_string":"CISUC, University of Coimbra, Coimbra, Portugal","institution_ids":["https://openalex.org/I76903346"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100345291"],"corresponding_institution_ids":["https://openalex.org/I76903346"],"apc_list":null,"apc_paid":null,"fwci":2.6,"has_fulltext":true,"cited_by_count":30,"citation_normalized_percentile":{"value":0.90239363,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"972","last_page":"977"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9894999861717224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10927","display_name":"Access Control and Trust","score":0.9721999764442444,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8435996174812317},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6724774837493896},{"id":"https://openalex.org/keywords/general-data-protection-regulation","display_name":"General Data Protection Regulation","score":0.6155747771263123},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5901908874511719},{"id":"https://openalex.org/keywords/information-privacy","display_name":"Information privacy","score":0.5670579075813293},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5458673238754272},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.48674654960632324},{"id":"https://openalex.org/keywords/personally-identifiable-information","display_name":"Personally identifiable information","score":0.4601927101612091},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4509803056716919},{"id":"https://openalex.org/keywords/data-protection-act-1998","display_name":"Data Protection Act 1998","score":0.43946635723114014},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4343223571777344},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.37706589698791504},{"id":"https://openalex.org/keywords/computer-security","display_name":"Computer security","score":0.24696195125579834},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.19811660051345825}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8435996174812317},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6724774837493896},{"id":"https://openalex.org/C3090818","wikidata":"https://www.wikidata.org/wiki/Q1172506","display_name":"General Data Protection Regulation","level":3,"score":0.6155747771263123},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5901908874511719},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.5670579075813293},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5458673238754272},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.48674654960632324},{"id":"https://openalex.org/C169093310","wikidata":"https://www.wikidata.org/wiki/Q3702971","display_name":"Personally identifiable information","level":2,"score":0.4601927101612091},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4509803056716919},{"id":"https://openalex.org/C69360830","wikidata":"https://www.wikidata.org/wiki/Q1172237","display_name":"Data Protection Act 1998","level":2,"score":0.43946635723114014},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4343223571777344},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.37706589698791504},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.24696195125579834},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.19811660051345825},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/infocomwkshps50562.2020.9162683","is_oa":false,"landing_page_url":"https://doi.org/10.1109/infocomwkshps50562.2020.9162683","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE INFOCOM 2020 - IEEE Conference on Computer Communications Workshops (INFOCOM WKSHPS)","raw_type":"proceedings-article"},{"id":"pmh:oai:estudogeral.sib.uc.pt:10316/95068","is_oa":true,"landing_page_url":"http://hdl.handle.net/10316/95068","pdf_url":"http://hdl.handle.net/10316/95068","source":{"id":"https://openalex.org/S4306401208","display_name":"Estudo Geral (Universidade de Coimbra)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2802025818","host_organization_name":"Hospitais da Universidade de Coimbra","host_organization_lineage":["https://openalex.org/I2802025818"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"http://purl.org/coar/resource_type/c_6501"},{"id":"pmh:oai:estudogeral.sib.uc.pt:10316/93821","is_oa":true,"landing_page_url":"http://hdl.handle.net/10316/93821","pdf_url":null,"source":{"id":"https://openalex.org/S4306401208","display_name":"Estudo Geral (Universidade de Coimbra)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2802025818","host_organization_name":"Hospitais da Universidade de Coimbra","host_organization_lineage":["https://openalex.org/I2802025818"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:estudogeral.uc.pt:10316/93821","is_oa":true,"landing_page_url":"https://hdl.handle.net/10316/93821","pdf_url":null,"source":{"id":"https://openalex.org/S7407055320","display_name":"Estudo Geral","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"pmh:oai:estudogeral.sib.uc.pt:10316/95068","is_oa":true,"landing_page_url":"http://hdl.handle.net/10316/95068","pdf_url":"http://hdl.handle.net/10316/95068","source":{"id":"https://openalex.org/S4306401208","display_name":"Estudo Geral (Universidade de Coimbra)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2802025818","host_organization_name":"Hospitais da Universidade de Coimbra","host_organization_lineage":["https://openalex.org/I2802025818"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"http://purl.org/coar/resource_type/c_6501"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1691366356","display_name":null,"funder_award_id":"786713","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"}],"funders":[{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3048742277.pdf","grobid_xml":"https://content.openalex.org/works/W3048742277.grobid-xml"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W1521626219","https://openalex.org/W1583031633","https://openalex.org/W1969887143","https://openalex.org/W1982589161","https://openalex.org/W2004763266","https://openalex.org/W2123442489","https://openalex.org/W2147880316","https://openalex.org/W2153848201","https://openalex.org/W2250539671","https://openalex.org/W2286961399","https://openalex.org/W2514317811","https://openalex.org/W2728951081","https://openalex.org/W2857028992"],"related_works":["https://openalex.org/W2917102635","https://openalex.org/W3023256691","https://openalex.org/W2603253017","https://openalex.org/W2883729192","https://openalex.org/W2901967497","https://openalex.org/W2910484607","https://openalex.org/W2789497412","https://openalex.org/W2794700933","https://openalex.org/W3171079982","https://openalex.org/W3048759155"],"abstract_inverted_index":{"Privacy":[0,183],"concerns":[1],"are":[2,18,117],"constantly":[3],"increasing":[4],"in":[5,40,52,140],"different":[6],"sectors.":[7],"Regulations":[8],"such":[9],"as":[10,71,181,186,188],"the":[11,23,54,65,89,104,107,151,154,163,189],"EU's":[12],"General":[13],"Data":[14],"Protection":[15],"Regulation":[16],"(GDPR)":[17],"pressuring":[19],"organizations":[20,51],"to":[21,49,74],"handle":[22],"individual's":[24],"data":[25,39,56,125],"with":[26,33,109,121],"reinforced":[27],"caution.":[28],"As":[29],"information":[30],"systems":[31],"deal":[32],"increasingly":[34],"large":[35],"amounts":[36],"of":[37,47,67,88,106],"personal":[38],"essential":[41],"services,":[42],"there":[43],"is":[44],"a":[45,72,110,182],"lack":[46],"mechanisms":[48],"help":[50],"protecting":[53],"involved":[55],"subjects.":[57],"In":[58,82],"this":[59,170],"paper,":[60],"we":[61,85,102,173],"propose":[62],"and":[63,77,99,119,145,158,161,192],"evaluate":[64],"use":[66],"Named":[68],"Entity":[69],"Recognition":[70],"way":[73],"identify,":[75],"monitor":[76],"validate":[78],"Personally":[79],"Identifiable":[80],"Information.":[81],"our":[83,176],"experiments,":[84],"used":[86],"three":[87],"most":[90],"well-known":[91],"Natural":[92],"Language":[93],"Processing":[94],"tools":[95,108],"(NLTK,":[96],"Stanford":[97],"CoreNLP,":[98],"spaCy).":[100],"First,":[101],"assess":[103],"effectiveness":[105],"generic":[111,144],"dataset.":[112],"Then,":[113],"machine":[114],"learning":[115],"models":[116],"trained":[118],"evaluated":[120],"datasets":[122],"built":[123],"on":[124],"that":[126,134],"contain":[127],"personally":[128],"identifiable":[129],"information.":[130],"The":[131],"results":[132],"show":[133],"models'":[135],"performance":[136,160],"was":[137],"highly":[138],"positive":[139],"accurately":[141],"classifying":[142],"both":[143],"more":[146],"context-specific":[147],"data.":[148],"We":[149],"observe":[150],"relationship":[152],"between":[153],"datasets'":[155],"training":[156,168],"size":[157,165],"respective":[159],"estimate":[162],"appropriate":[164],"for":[166],"model":[167],"within":[169],"context.":[171],"Furthermore,":[172],"discuss":[174],"how":[175],"proposal":[177],"can":[178],"effectively":[179],"act":[180],"Enhancing":[184],"Technology":[185],"well":[187],"potential":[190],"risks":[191],"associated":[193],"impacts.":[194]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":6},{"year":2021,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
