{"id":"https://openalex.org/W4409657193","doi":"https://doi.org/10.1145/3696410.3714703","title":"Self-Comparison for Dataset-Level Membership Inference in Large (Vision-)Language Model","display_name":"Self-Comparison for Dataset-Level Membership Inference in Large (Vision-)Language Model","publication_year":2025,"publication_date":"2025-04-22","ids":{"openalex":"https://openalex.org/W4409657193","doi":"https://doi.org/10.1145/3696410.3714703"},"language":"en","primary_location":{"id":"doi:10.1145/3696410.3714703","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714703","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714703","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714703","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103893731","display_name":"Jie Ren","orcid":"https://orcid.org/0000-0003-2663-6405"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jie Ren","raw_affiliation_strings":["Michigan State University, East Lansing, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013645188","display_name":"Kangrui Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kangrui Chen","raw_affiliation_strings":["Michigan State University, East Lansing, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114973667","display_name":"Chen Chen","orcid":"https://orcid.org/0000-0001-7359-8515"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen Chen","raw_affiliation_strings":["Sony AI, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Sony AI, Zurich, Switzerland","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011437254","display_name":"Vikash Sehwag","orcid":"https://orcid.org/0000-0001-7160-8556"},"institutions":[{"id":"https://openalex.org/I2800278093","display_name":"Sony Corporation (United States)","ror":"https://ror.org/05k91zb11","country_code":"US","type":"company","lineage":["https://openalex.org/I2800278093"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vikash Sehwag","raw_affiliation_strings":["Sony AI, New York City, New York, USA"],"affiliations":[{"raw_affiliation_string":"Sony AI, New York City, New York, USA","institution_ids":["https://openalex.org/I2800278093"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041044132","display_name":"Yue Xing","orcid":"https://orcid.org/0000-0001-7723-0048"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yue Xing","raw_affiliation_strings":["Michigan State University, East Lansing, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040639891","display_name":"Jiliang Tang","orcid":"https://orcid.org/0000-0001-7125-3898"},"institutions":[{"id":"https://openalex.org/I87216513","display_name":"Michigan State University","ror":"https://ror.org/05hs6h993","country_code":"US","type":"education","lineage":["https://openalex.org/I87216513"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiliang Tang","raw_affiliation_strings":["Michigan State University, East Lansing, Michigan, USA"],"affiliations":[{"raw_affiliation_string":"Michigan State University, East Lansing, Michigan, USA","institution_ids":["https://openalex.org/I87216513"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052577882","display_name":"Lingjuan Lyu","orcid":"https://orcid.org/0000-0003-3170-4994"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lingjuan Lyu","raw_affiliation_strings":["Sony AI, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"Sony AI, Zurich, Switzerland","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5103893731"],"corresponding_institution_ids":["https://openalex.org/I87216513"],"apc_list":null,"apc_paid":null,"fwci":2.4268,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.88294525,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"910","last_page":"920"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7597233057022095},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6843781471252441},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5166052579879761},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.4342080056667328},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.43098723888397217},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.10304462909698486}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7597233057022095},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6843781471252441},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5166052579879761},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.4342080056667328},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.43098723888397217},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.10304462909698486}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3696410.3714703","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714703","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714703","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3696410.3714703","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3696410.3714703","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3696410.3714703","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Web Conference 2025","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1032364338","display_name":"Interactive, Individualized Professional Learning for Elementary School Teachers: Enhancing Content and Pedagogical Content Knowledge as a Basis for Improving Practice","funder_award_id":"2405483","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G1432373144","display_name":null,"funder_award_id":"W911NF-21-1-","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G2652453830","display_name":"NeTS: Small: Exploring the Non-Standardized Polices, Operations, and Requirements for 5G Cellular Networks and Beyond: Advancing the Modeling, Tools, and Evaluation","funder_award_id":"2321416","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G3695851104","display_name":"III:Medium:Computation and Communication Efficient Distributed Learning","funder_award_id":"2212032","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G4659635499","display_name":"Collaborative Research: SaTC: CORE: Medium: Safeguarding Next-Generation Emergency Services (NG-9-1-1) over Cellular Networks: From Design to Practice","funder_award_id":"2246050","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5349860205","display_name":"Intelligent, Adaptive Program with Just-in-time Feedback for Preservice Teachers","funder_award_id":"2234015","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G545420438","display_name":"Collaborative Research: III: Medium: Graph Neural Networks for Heterophilous Data: Advancing the Theory, Models, and Applications","funder_award_id":"2212144","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5808857907","display_name":null,"funder_award_id":"DRL2405483","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5921281487","display_name":null,"funder_award_id":"number","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G6113845086","display_name":null,"funder_award_id":"2035472","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G647087074","display_name":null,"funder_award_id":"IOS2107215","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7402679956","display_name":null,"funder_award_id":"IOS2035472","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7452299184","display_name":null,"funder_award_id":"W911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G7794432752","display_name":null,"funder_award_id":"IIS2212144","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8083687766","display_name":null,"funder_award_id":"CNS2246050","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G8657258523","display_name":"TRTech-PGR: Connecting sequences to functions within and between species through computational modeling and experimental studies","funder_award_id":"2107215","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G877152271","display_name":null,"funder_award_id":"W911NF-21-1-0198","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"},{"id":"https://openalex.org/G8998121839","display_name":null,"funder_award_id":"911NF","funder_id":"https://openalex.org/F4320338281","funder_display_name":"Army Research Office"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338281","display_name":"Army Research Office","ror":"https://ror.org/05epdh915"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4409657193.pdf","grobid_xml":"https://content.openalex.org/works/W4409657193.grobid-xml"},"referenced_works_count":21,"referenced_works":["https://openalex.org/W244947329","https://openalex.org/W2019759670","https://openalex.org/W2185175083","https://openalex.org/W2242479605","https://openalex.org/W2277195237","https://openalex.org/W2535690855","https://openalex.org/W2896348597","https://openalex.org/W2904565150","https://openalex.org/W2930926105","https://openalex.org/W2979382951","https://openalex.org/W2981852735","https://openalex.org/W3089472875","https://openalex.org/W3103245149","https://openalex.org/W3104279398","https://openalex.org/W4288089799","https://openalex.org/W4306820534","https://openalex.org/W4381598461","https://openalex.org/W4385571053","https://openalex.org/W4388254181","https://openalex.org/W6600669965","https://openalex.org/W7035904466"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2,6],"(LLMs)":[3],"and":[4,20,48,61,67,80,96,162,175,182,218,225],"Vision-Language":[5],"(VLMs)":[7],"have":[8,38],"made":[9],"significant":[10],"advancements":[11],"in":[12,33,84,201],"a":[13,30,123,135,140,145],"wide":[14],"range":[15],"of":[16,45,158,172],"natural":[17],"language":[18],"processing":[19],"vision-language":[21],"tasks.":[22],"Access":[23],"to":[24,78,94,168,194],"large":[25],"web-scale":[26],"datasets":[27,224],"has":[28],"been":[29,39],"key":[31],"factor":[32],"their":[34],"success.":[35],"However,":[36],"concerns":[37],"raised":[40],"about":[41],"the":[42,72,100,111,115,151,169,173,178],"unauthorized":[43],"use":[44],"copyrighted":[46],"materials":[47],"potential":[49],"copyright":[50],"infringement.":[51],"Existing":[52],"methods,":[53],"such":[54,98],"as":[55,99,114],"sample-level":[56],"Membership":[57],"Inference":[58],"Attacks":[59],"(MIA)":[60],"distribution-based":[62],"dataset,":[63],"inference":[64,127,220],"distinguish":[65],"member":[66,85,104,136,146,161,196],"non-member":[68,107,141,163,199],"data":[69,105,108,197,200],"by":[70,139],"leveraging":[71],"common":[73],"observation":[74],"that":[75,109,134,211],"models":[76],"tend":[77],"memorize":[79],"show":[81],"greater":[82],"confidence":[83],"data.":[86,117,156],"Nevertheless,":[87],"these":[88],"methods":[89],"face":[90],"challenges":[91],"when":[92],"applied":[93],"LLMs":[95],"VLMs,":[97],"requirement":[101],"for":[102],"ground-truth":[103,195],"or":[106,198],"shares":[110],"same":[112],"distribution":[113],"test":[116],"In":[118],"this":[119],"paper,":[120],"we":[121,165],"propose":[122],"novel":[124],"dataset-level":[125],"membership":[126],"method":[128,189,214],"based":[129],"on":[130,154],"Self-Comparison.":[131],"We":[132],"find":[133],"prefix":[137],"followed":[138],"suffix":[142],"(paraphrased":[143],"from":[144],"suffix)":[147],"can":[148],"further":[149],"trigger":[150],"model's":[152],"memorization":[153],"training":[155],"Instead":[157],"directly":[159],"comparing":[160],"data,":[164],"introduce":[166],"paraphrasing":[167],"second":[170],"half":[171],"sequence":[174],"evaluate":[176],"how":[177],"likelihood":[179],"changes":[180],"before":[181],"after":[183],"paraphrasing.":[184],"Unlike":[185],"prior":[186],"approaches,":[187],"our":[188,212],"does":[190],"not":[191],"require":[192],"access":[193],"identical":[202],"distribution,":[203],"making":[204],"it":[205],"more":[206],"practical.":[207],"Extensive":[208],"experiments":[209],"demonstrate":[210],"proposed":[213],"outperforms":[215],"traditional":[216],"MIA":[217],"dataset":[219],"techniques":[221],"across":[222],"various":[223],"models,":[226],"including":[227],"GPT-4o.":[228]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
