{"id":"https://openalex.org/W7130578036","doi":"https://doi.org/10.48550/arxiv.2602.15866","title":"NLP Privacy Risk Identification in Social Media (NLP-PRISM): A Survey","display_name":"NLP Privacy Risk Identification in Social Media (NLP-PRISM): A Survey","publication_year":2026,"publication_date":"2026-01-26","ids":{"openalex":"https://openalex.org/W7130578036","doi":"https://doi.org/10.48550/arxiv.2602.15866"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.15866","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15866","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.15866","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5083215154","display_name":"Dhiman Goswami","orcid":"https://orcid.org/0000-0002-0112-9653"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Goswami, Dhiman","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123846397","display_name":"Jai Kruthunz Naveen Kumar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kumar, Jai Kruthunz Naveen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5079539401","display_name":"Sanchari Das","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Das, Sanchari","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5083215154"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.7936999797821045,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.7936999797821045,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.04309999942779541,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.029100000858306885,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.7361000180244446},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6578999757766724},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6270999908447266},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.5454000234603882},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5181000232696533},{"id":"https://openalex.org/keywords/sentiment-analysis","display_name":"Sentiment analysis","score":0.4334999918937683},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4302999973297119},{"id":"https://openalex.org/keywords/offensive","display_name":"Offensive","score":0.4099999964237213}],"concepts":[{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.7361000180244446},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6797999739646912},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6578999757766724},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6270999908447266},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6140000224113464},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.58160001039505},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.5454000234603882},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5181000232696533},{"id":"https://openalex.org/C66402592","wikidata":"https://www.wikidata.org/wiki/Q2271421","display_name":"Sentiment analysis","level":2,"score":0.4334999918937683},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4302999973297119},{"id":"https://openalex.org/C176856949","wikidata":"https://www.wikidata.org/wiki/Q2001676","display_name":"Offensive","level":2,"score":0.4099999964237213},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.40610000491142273},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.3921000063419342},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3483999967575073},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3222000002861023},{"id":"https://openalex.org/C123201435","wikidata":"https://www.wikidata.org/wiki/Q456632","display_name":"Information privacy","level":2,"score":0.31850001215934753},{"id":"https://openalex.org/C137822555","wikidata":"https://www.wikidata.org/wiki/Q2587068","display_name":"Information sensitivity","level":2,"score":0.3116999864578247},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.30149999260902405},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2969000041484833},{"id":"https://openalex.org/C17632256","wikidata":"https://www.wikidata.org/wiki/Q1076968","display_name":"Digital media","level":2,"score":0.2867000102996826},{"id":"https://openalex.org/C79158427","wikidata":"https://www.wikidata.org/wiki/Q485396","display_name":"Analytics","level":2,"score":0.27730000019073486},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.27480000257492065},{"id":"https://openalex.org/C2778729106","wikidata":"https://www.wikidata.org/wiki/Q1140126","display_name":"Social media analytics","level":3,"score":0.27379998564720154},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.26989999413490295},{"id":"https://openalex.org/C175968658","wikidata":"https://www.wikidata.org/wiki/Q839447","display_name":"Privacy laws of the United States","level":3,"score":0.2612000107765198},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.25999999046325684}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.15866","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15866","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.15866","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.15866","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.4344509243965149,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Natural":[0],"Language":[1],"Processing":[2],"(NLP)":[3],"is":[4],"integral":[5],"to":[6,165],"social":[7,170],"media":[8,171],"analytics":[9],"but":[10,82],"often":[11],"processes":[12],"content":[13],"containing":[14],"Personally":[15],"Identifiable":[16],"Information":[17],"(PII),":[18],"behavioral":[19],"cues,":[20],"and":[21,30,43,68,119,162],"metadata":[22],"raising":[23],"privacy":[24,96,127],"risks":[25],"such":[26],"as":[27],"surveillance,":[28],"profiling,":[29],"targeted":[31],"advertising.":[32],"To":[33],"systematically":[34],"assess":[35],"these":[36],"risks,":[37],"we":[38,94,155],"review":[39],"203":[40],"peer-reviewed":[41],"papers":[42],"propose":[44],"the":[45],"NLP":[46,100,168],"Privacy":[47],"Risk":[48],"Identification":[49],"in":[50,98,126,139,169],"Social":[51],"Media":[52],"(NLP-PRISM)":[53],"framework,":[54],"which":[55],"evaluates":[56],"vulnerabilities":[57],"across":[58],"six":[59,99],"dimensions:":[60],"data":[61],"collection,":[62],"preprocessing,":[63],"visibility,":[64],"fairness,":[65],"computational":[66],"risk,":[67],"regulatory":[69],"compliance.":[70],"Our":[71],"analysis":[72,103],"shows":[73],"that":[74],"transformer":[75],"models":[76],"achieve":[77],"F1-scores":[78],"ranging":[79],"from":[80],"0.58-0.84,":[81],"incur":[83],"a":[84,132],"1%":[85],"-":[86,136],"23%":[87],"drop":[88],"under":[89],"privacy-preserving":[90],"fine-tuning.":[91],"Using":[92],"NLP-PRISM,":[93],"examine":[95],"coverage":[97],"tasks:":[101],"sentiment":[102],"(16),":[104],"emotion":[105],"detection":[106,121],"(14),":[107],"offensive":[108],"language":[109,116],"identification":[110,117],"(19),":[111],"code-mixed":[112],"processing":[113],"(39),":[114],"native":[115],"(29),":[118],"dialect":[120],"(24)":[122],"revealing":[123],"substantial":[124],"gaps":[125],"research.":[128],"We":[129],"further":[130],"found":[131],"(reduced":[133],"by":[134],"2%":[135],"9%)":[137],"trade-off":[138],"model":[140],"utility,":[141],"MIA":[142],"AUC":[143],"(membership":[144],"inference":[145,152],"attacks)":[146],"0.81,":[147],"AIA":[148],"accuracy":[149],"0.75":[150],"(attribute":[151],"attacks).":[153],"Finally,":[154],"advocate":[156],"for":[157],"stronger":[158],"anonymization,":[159],"privacy-aware":[160],"learning,":[161],"fairness-driven":[163],"training":[164],"enable":[166],"ethical":[167],"contexts.":[172]},"counts_by_year":[],"updated_date":"2026-02-20T06:18:38.638704","created_date":"2026-02-20T00:00:00"}
