{"id":"https://openalex.org/W4417132638","doi":"https://doi.org/10.1109/saner-c66551.2025.00016","title":"Malicious and Unintentional Disclosure Risks in Large Language Models for Code Generation","display_name":"Malicious and Unintentional Disclosure Risks in Large Language Models for Code Generation","publication_year":2025,"publication_date":"2025-03-04","ids":{"openalex":"https://openalex.org/W4417132638","doi":"https://doi.org/10.1109/saner-c66551.2025.00016"},"language":null,"primary_location":{"id":"doi:10.1109/saner-c66551.2025.00016","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner-c66551.2025.00016","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Software Analysis, Evolution and Reengineering - Companion (SANER-C)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112554318","display_name":"Rafiqul Islam Rabin","orcid":null},"institutions":[{"id":"https://openalex.org/I1293631320","display_name":"American Institutes for Research","ror":"https://ror.org/00490n048","country_code":"US","type":"funder","lineage":["https://openalex.org/I1293631320"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rafiqul Rabin","raw_affiliation_strings":["UL Research Institutes,Digital Safety Research Institute"],"affiliations":[{"raw_affiliation_string":"UL Research Institutes,Digital Safety Research Institute","institution_ids":["https://openalex.org/I1293631320"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030293975","display_name":"Sean McGregor","orcid":"https://orcid.org/0000-0001-5803-4981"},"institutions":[{"id":"https://openalex.org/I1293631320","display_name":"American Institutes for Research","ror":"https://ror.org/00490n048","country_code":"US","type":"funder","lineage":["https://openalex.org/I1293631320"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sean McGregor","raw_affiliation_strings":["UL Research Institutes,Digital Safety Research Institute"],"affiliations":[{"raw_affiliation_string":"UL Research Institutes,Digital Safety Research Institute","institution_ids":["https://openalex.org/I1293631320"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085552254","display_name":"Nick Judd","orcid":null},"institutions":[{"id":"https://openalex.org/I1293631320","display_name":"American Institutes for Research","ror":"https://ror.org/00490n048","country_code":"US","type":"funder","lineage":["https://openalex.org/I1293631320"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Nick Judd","raw_affiliation_strings":["UL Research Institutes,Digital Safety Research Institute"],"affiliations":[{"raw_affiliation_string":"UL Research Institutes,Digital Safety Research Institute","institution_ids":["https://openalex.org/I1293631320"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5112554318"],"corresponding_institution_ids":["https://openalex.org/I1293631320"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.51059619,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"61","last_page":"68"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.2289000004529953,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.2289000004529953,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.17139999568462372,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.08489999920129776,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.513700008392334},{"id":"https://openalex.org/keywords/memorization","display_name":"Memorization","score":0.45969998836517334},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4377000033855438},{"id":"https://openalex.org/keywords/unintended-consequences","display_name":"Unintended consequences","score":0.43290001153945923},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.39640000462532043},{"id":"https://openalex.org/keywords/information-sensitivity","display_name":"Information sensitivity","score":0.39579999446868896},{"id":"https://openalex.org/keywords/risk-assessment","display_name":"Risk assessment","score":0.3919000029563904},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.3774999976158142},{"id":"https://openalex.org/keywords/encryption","display_name":"Encryption","score":0.37630000710487366}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7098000049591064},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.5361999869346619},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.513700008392334},{"id":"https://openalex.org/C30038468","wikidata":"https://www.wikidata.org/wiki/Q4354775","display_name":"Memorization","level":2,"score":0.45969998836517334},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4377000033855438},{"id":"https://openalex.org/C2776889888","wikidata":"https://www.wikidata.org/wiki/Q1135789","display_name":"Unintended consequences","level":2,"score":0.43290001153945923},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.39640000462532043},{"id":"https://openalex.org/C137822555","wikidata":"https://www.wikidata.org/wiki/Q2587068","display_name":"Information sensitivity","level":2,"score":0.39579999446868896},{"id":"https://openalex.org/C12174686","wikidata":"https://www.wikidata.org/wiki/Q1058438","display_name":"Risk assessment","level":2,"score":0.3919000029563904},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.3774999976158142},{"id":"https://openalex.org/C148730421","wikidata":"https://www.wikidata.org/wiki/Q141090","display_name":"Encryption","level":2,"score":0.37630000710487366},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.34709998965263367},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3450999855995178},{"id":"https://openalex.org/C165609540","wikidata":"https://www.wikidata.org/wiki/Q1172486","display_name":"Data breach","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.32519999146461487},{"id":"https://openalex.org/C108827166","wikidata":"https://www.wikidata.org/wiki/Q175975","display_name":"Internet privacy","level":1,"score":0.31349998712539673},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.30809998512268066},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.30000001192092896},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.29120001196861267},{"id":"https://openalex.org/C32896092","wikidata":"https://www.wikidata.org/wiki/Q189447","display_name":"Risk management","level":2,"score":0.28220000863075256},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.2816999852657318},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.26840001344680786},{"id":"https://openalex.org/C99221444","wikidata":"https://www.wikidata.org/wiki/Q1532069","display_name":"Private information retrieval","level":2,"score":0.267300009727478},{"id":"https://openalex.org/C107101626","wikidata":"https://www.wikidata.org/wiki/Q842234","display_name":"Full disclosure","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.2535000145435333},{"id":"https://openalex.org/C184356942","wikidata":"https://www.wikidata.org/wiki/Q830382","display_name":"Best practice","level":2,"score":0.2517000138759613},{"id":"https://openalex.org/C47487241","wikidata":"https://www.wikidata.org/wiki/Q5227230","display_name":"Data access","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/saner-c66551.2025.00016","is_oa":false,"landing_page_url":"https://doi.org/10.1109/saner-c66551.2025.00016","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Software Analysis, Evolution and Reengineering - Companion (SANER-C)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320317052","display_name":"Allen Institute for Artificial Intelligence","ror":"https://ror.org/05w520734"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W3035261884","https://openalex.org/W3170672407","https://openalex.org/W4252326246","https://openalex.org/W4284664377","https://openalex.org/W4295646147","https://openalex.org/W4384345631","https://openalex.org/W4385571225","https://openalex.org/W4385573569","https://openalex.org/W4385679821","https://openalex.org/W4391591654","https://openalex.org/W4393161682","https://openalex.org/W4394744221","https://openalex.org/W4394745382","https://openalex.org/W4399577216","https://openalex.org/W4400582422","https://openalex.org/W4402671236","https://openalex.org/W4402671286","https://openalex.org/W4404518445","https://openalex.org/W4404781336","https://openalex.org/W4409360930"],"related_works":[],"abstract_inverted_index":{"This":[0],"paper":[1],"explores":[2],"the":[3,39,57,78,125,160,176,194,214],"risk":[4,169,177],"that":[5,24,83,142,159,175],"a":[6,95],"large":[7],"language":[8],"model":[9],"(LLM)":[10],"trained":[11],"for":[12,213],"code":[13],"generation":[14],"on":[15,202],"data":[16,145,203,217],"mined":[17],"from":[18],"software":[19],"repositories":[20],"will":[21],"generate":[22],"content":[23],"discloses":[25],"sensitive":[26,180,197],"information":[27,181],"included":[28],"in":[29,38,144,154],"its":[30,134],"training":[31,79,112,136,216],"data.":[32],"We":[33,81,97,116],"decompose":[34],"this":[35],"risk,":[36],"known":[37],"literature":[40],"as":[41],"\"unintended":[42],"memorization,\"":[43],"into":[44],"two":[45],"components:":[46],"unintentional":[47,91,102],"disclosure":[48,64,92,105],"(where":[49,65],"an":[50,66,71,121],"LLM":[51,67,215],"presents":[52,68],"secrets":[53,69],"to":[54,70,100,205],"users":[55],"without":[56],"user":[58],"seeking":[59],"them":[60],"out)":[61],"and":[62,103,114,133,147,209],"malicious":[63,89,104],"attacker":[72],"equipped":[73],"with":[74,151],"partial":[75],"knowledge":[76],"of":[77,111,124,131,163,178,196],"data).":[80],"observe":[82],"while":[84,170],"existing":[85],"work":[86],"mostly":[87],"anticipates":[88],"disclosure,":[90],"is":[93],"also":[94,192],"concern.":[96],"describe":[98],"methods":[99,119],"assess":[101],"risks":[106],"side-by-side":[107],"across":[108],"different":[109],"releases":[110],"datasets":[113,190],"models.":[115],"demonstrate":[117],"these":[118],"through":[120],"independent":[122],"assessment":[123],"Open":[126],"Language":[127],"Model":[128],"(OLMo)":[129],"family":[130],"models":[132],"Dolma":[135],"datasets.":[137],"Our":[138],"results":[139],"show,":[140],"first,":[141],"changes":[143,153,165],"source":[146],"processing":[148],"are":[149],"associated":[150],"substantial":[152],"unintended":[155],"memorization":[156],"risk;":[157],"second,":[158],"same":[161],"set":[162],"operational":[164],"may":[166],"increase":[167],"one":[168],"mitigating":[171],"another;":[172],"and,":[173],"third,":[174],"disclosing":[179],"varies":[182],"not":[183],"only":[184],"by":[185,193],"prompt":[186],"strategies":[187],"or":[188],"test":[189],"but":[191],"types":[195],"information.":[198],"These":[199],"contributions":[200],"rely":[201],"mining":[204],"enable":[206],"greater":[207],"privacy":[208],"security":[210],"testing":[211],"required":[212],"supply":[218],"chain.":[219]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-12-08T00:00:00"}
