{"id":"https://openalex.org/W4406612164","doi":"https://doi.org/10.1109/smc54092.2024.10831852","title":"Refining Sensitive Document Classification: Introducing an Enhanced Dataset Proposal","display_name":"Refining Sensitive Document Classification: Introducing an Enhanced Dataset Proposal","publication_year":2024,"publication_date":"2024-10-06","ids":{"openalex":"https://openalex.org/W4406612164","doi":"https://doi.org/10.1109/smc54092.2024.10831852"},"language":"en","primary_location":{"id":"doi:10.1109/smc54092.2024.10831852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc54092.2024.10831852","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5094044949","display_name":"Mouhamet Latyr Ndiaye","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mouhamet Latyr Ndiaye","raw_affiliation_strings":["OODRIVE-Trusted Cloud Solutions,Paris,France,75010"],"affiliations":[{"raw_affiliation_string":"OODRIVE-Trusted Cloud Solutions,Paris,France,75010","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046944392","display_name":"Ahmed Hamdi","orcid":"https://orcid.org/0000-0002-8964-2135"},"institutions":[{"id":"https://openalex.org/I78744979","display_name":"La Rochelle Universit\u00e9","ror":"https://ror.org/04mv1z119","country_code":"FR","type":"education","lineage":["https://openalex.org/I78744979"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Ahmed Hamdi","raw_affiliation_strings":["University of La Rochelle,L3i Lab,La Rochelle,France,17000"],"affiliations":[{"raw_affiliation_string":"University of La Rochelle,L3i Lab,La Rochelle,France,17000","institution_ids":["https://openalex.org/I78744979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024641034","display_name":"A. Mokhtari","orcid":"https://orcid.org/0000-0002-3771-8257"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Amdjed Mokhtari","raw_affiliation_strings":["OODRIVE-Trusted Cloud Solutions,Paris,France,75010"],"affiliations":[{"raw_affiliation_string":"OODRIVE-Trusted Cloud Solutions,Paris,France,75010","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076759830","display_name":"Yacine Ghamri-Doudane","orcid":"https://orcid.org/0000-0002-7986-2476"},"institutions":[{"id":"https://openalex.org/I78744979","display_name":"La Rochelle Universit\u00e9","ror":"https://ror.org/04mv1z119","country_code":"FR","type":"education","lineage":["https://openalex.org/I78744979"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Yacine Ghamri-Doudane","raw_affiliation_strings":["University of La Rochelle,L3i Lab,La Rochelle,France,17000"],"affiliations":[{"raw_affiliation_string":"University of La Rochelle,L3i Lab,La Rochelle,France,17000","institution_ids":["https://openalex.org/I78744979"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5094044949"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.24003708,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2053","last_page":"2058"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.7868000268936157,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.7868000268936157,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/refining","display_name":"Refining (metallurgy)","score":0.8444191813468933},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6963887810707092},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4425398111343384},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38927558064460754},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3821556568145752},{"id":"https://openalex.org/keywords/materials-science","display_name":"Materials science","score":0.0786842405796051}],"concepts":[{"id":"https://openalex.org/C60044698","wikidata":"https://www.wikidata.org/wiki/Q1283324","display_name":"Refining (metallurgy)","level":2,"score":0.8444191813468933},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6963887810707092},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4425398111343384},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38927558064460754},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3821556568145752},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0786842405796051},{"id":"https://openalex.org/C191897082","wikidata":"https://www.wikidata.org/wiki/Q11467","display_name":"Metallurgy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc54092.2024.10831852","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc54092.2024.10831852","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W2053154970","https://openalex.org/W2064675550","https://openalex.org/W2742330194","https://openalex.org/W2980708516","https://openalex.org/W3103912187","https://openalex.org/W4328053648","https://openalex.org/W4366549000","https://openalex.org/W4385245566","https://openalex.org/W4391845196","https://openalex.org/W4401044057","https://openalex.org/W6777615688","https://openalex.org/W6852865209"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W1595345252","https://openalex.org/W2392526918","https://openalex.org/W2362540361","https://openalex.org/W2019560916","https://openalex.org/W2361983698","https://openalex.org/W2347697528","https://openalex.org/W2354123794"],"abstract_inverted_index":{"The":[0,193],"need":[1,46],"for":[2],"document":[3,35,86],"exchange":[4],"between":[5],"people,":[6],"companies":[7],"and":[8,31,68,88,142,174],"government":[9],"increases":[10],"every":[11],"day.":[12],"Consequently,":[13],"safeguarding":[14],"documents":[15,54,138,165],"against":[16],"potential":[17],"attackers":[18],"becomes":[19],"increasingly":[20],"crucial.":[21],"Several":[22],"attacks":[23],"have":[24,64,81],"been":[25,197],"reported":[26],"over":[27,199],"the":[28,32,50,106,185],"past":[29],"years":[30],"risk":[33],"of":[34,53,109,136,164,191],"leak":[36],"is":[37],"more":[38],"present":[39],"nowadays.":[40],"To":[41,75],"prevent":[42],"data":[43],"violation,":[44],"we":[45,124,150],"tools":[47],"to":[48,58,66,69,72,99,105,115,131,159,182],"determine":[49],"sensitivity":[51,73,91,169],"degree":[52],"which":[55],"allows":[56],"us":[57],"guarantee":[59],"that":[60],"only":[61],"authorized":[62],"people":[63],"access":[65],"them":[67],"adapt":[70],"strategies":[71,154],"levels.":[74],"achieve":[76],"this,":[77],"deep":[78],"learning":[79],"techniques":[80],"shown":[82],"good":[83],"performances":[84],"in":[85,90,118,139,188],"classification":[87],"therefore":[89],"identification.":[92],"Such":[93],"approaches":[94],"require":[95],"sufficiently":[96],"large":[97],"resources":[98],"learn":[100],"robust":[101,201],"models.":[102],"However,":[103],"due":[104],"sensitive":[107],"nature":[108],"documents,":[110],"public":[111],"datasets":[112],"are":[113],"missing":[114],"conduct":[116],"research":[117],"this":[119,122],"context.":[120],"In":[121],"paper,":[123],"experiment":[125],"with":[126],"Large":[127],"Language":[128],"Models":[129],"(LLM)":[130],"generate":[132],"a":[133,146,161,189],"multi-domain":[134],"dataset":[135,163,195],"business":[137],"both":[140],"english":[141],"french":[143],"languages.":[144],"Utilizing":[145],"two-step":[147],"generation":[148],"process,":[149],"employ":[151],"several":[152],"prompting":[153],"across":[155],"six":[156],"language":[157],"models":[158],"create":[160],"first":[162],"classified":[166],"into":[167],"4":[168],"classes:":[170],"Public,":[171],"Internal,":[172],"Confidential":[173],"Restricted.":[175],"We":[176],"then":[177],"relied":[178],"on":[179],"human":[180],"experts":[181],"review":[183],"validate":[184],"annotations":[186],"generated":[187,194],"sample":[190],"documents.":[192],"has":[196],"tested":[198],"two":[200],"baselines.":[202]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
