{"id":"https://openalex.org/W7155179997","doi":"https://doi.org/10.48550/arxiv.2604.19219","title":"Sherpa.ai Privacy-Preserving Multi-Party Entity Alignment without Intersection Disclosure for Noisy Identifiers","display_name":"Sherpa.ai Privacy-Preserving Multi-Party Entity Alignment without Intersection Disclosure for Noisy Identifiers","publication_year":2026,"publication_date":"2026-04-21","ids":{"openalex":"https://openalex.org/W7155179997","doi":"https://doi.org/10.48550/arxiv.2604.19219"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.19219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.19219","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5134247515","display_name":"Daniel M. Jimenez-Gutierrez","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jimenez-Gutierrez, Daniel M.","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088702266","display_name":"Enrique Zuazua","orcid":"https://orcid.org/0000-0002-1377-0958"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zuazua, Enrique","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064604690","display_name":"Georgios Kellaris","orcid":"https://orcid.org/0000-0001-7558-5398"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kellaris, Georgios","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134245273","display_name":"Joaquin Del Rio","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Del Rio, Joaquin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120362707","display_name":"Oleksii Sliusarenko","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sliusarenko, Oleksii","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120319798","display_name":"Xabi Uribe-Etxebarria","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Uribe-Etxebarria, Xabi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5134247515"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9107000231742859,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9107000231742859,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.04569999873638153,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.006800000090152025,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.8208000063896179},{"id":"https://openalex.org/keywords/intersection","display_name":"Intersection (aeronautics)","score":0.8047000169754028},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.6682999730110168},{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.5947999954223633},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5320000052452087},{"id":"https://openalex.org/keywords/index","display_name":"Index (typography)","score":0.4794999957084656},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.4706999957561493},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4404999911785126}],"concepts":[{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.8208000063896179},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.8047000169754028},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7368999719619751},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.6682999730110168},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.5947999954223633},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5320000052452087},{"id":"https://openalex.org/C2777382242","wikidata":"https://www.wikidata.org/wiki/Q6017816","display_name":"Index (typography)","level":2,"score":0.4794999957084656},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.47290000319480896},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.4706999957561493},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4404999911785126},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3928999900817871},{"id":"https://openalex.org/C2777168461","wikidata":"https://www.wikidata.org/wiki/Q42196253","display_name":"Set operations","level":3,"score":0.3846000134944916},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3450999855995178},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34310001134872437},{"id":"https://openalex.org/C45357846","wikidata":"https://www.wikidata.org/wiki/Q2001982","display_name":"Notation","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.3203999996185303},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.3149000108242035},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.313400000333786},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.30059999227523804},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.2971000075340271},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2793999910354614},{"id":"https://openalex.org/C28944840","wikidata":"https://www.wikidata.org/wiki/Q1256545","display_name":"Synchro","level":2,"score":0.2757999897003174},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.2718999981880188}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.19219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.19219","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.19219","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16","score":0.7624902129173279}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Federated":[0],"Learning":[1],"(FL)":[2],"enables":[3,150],"collaborative":[4,234],"model":[5],"training":[6,55],"among":[7],"multiple":[8,162],"parties":[9,41,69,123,163],"without":[10,71],"centralizing":[11],"raw":[12],"data.":[13],"There":[14],"are":[15,75,118],"two":[16,122,170],"main":[17],"paradigms":[18],"in":[19,224],"FL:":[20],"Horizontal":[21],"FL":[22,38],"(HFL),":[23],"where":[24,40],"all":[25],"participants":[26],"share":[27],"the":[28,46,107,113,135],"same":[29,47],"feature":[30],"space":[31],"but":[32,86],"hold":[33],"different":[34],"samples,":[35],"and":[36,149,153,168,178,185,191,195,199,239,241,247],"Vertical":[37],"(VFL),":[39],"possess":[42],"complementary":[43],"features":[44],"for":[45,53,127,140,175,222],"set":[48,81,98],"of":[49,66,109],"samples.":[50],"A":[51],"prerequisite":[52],"VFL":[54,226],"is":[56],"privacy-preserving":[57],"entity":[58],"alignment":[59,85,177],"(PPEA),":[60],"which":[61,73],"establishes":[62],"a":[63,142,201,209,217],"common":[64],"index":[65,203,211],"samples":[67,74],"across":[68],"(alignment)":[70],"revealing":[72],"shared":[76,210],"between":[77,93,237,245],"them.":[78],"Conventional":[79],"private":[80,97],"intersection":[82,88,147,252],"(PSI)":[83],"achieves":[84],"leaks":[87],"membership,":[89],"exposing":[90],"sensitive":[91],"relationships":[92],"datasets.":[94],"The":[95,156],"standard":[96],"union":[99,108],"(PSU)":[100],"mitigates":[101],"this":[102,131],"risk":[103,235],"by":[104],"aligning":[105],"on":[106],"identifiers":[110],"rather":[111],"than":[112],"intersection.":[114],"However,":[115],"existing":[116],"approaches":[117,160],"often":[119],"limited":[120],"to":[121,161,183,208],"or":[124],"lack":[125],"support":[126],"typo-tolerant":[128],"matching.":[129,155],"In":[130],"paper,":[132],"we":[133],"introduce":[134],"Sherpa.ai":[136],"multi-party":[137,214],"PSU":[138,215],"protocol":[139,157,221],"VFL,":[141],"PPEA":[143,223],"method":[144],"that":[145],"hides":[146],"membership":[148],"both":[151],"exact":[152,176],"noisy":[154],"generalizes":[158],"two-party":[159],"with":[164],"low":[165],"communication":[166,194],"overhead":[167],"offers":[169,216],"variants:":[171],"an":[172,179],"order-preserving":[173],"version":[174,181],"unordered":[180],"tolerant":[182],"typographical":[184],"formatting":[186],"discrepancies.":[187],"We":[188],"prove":[189],"correctness":[190],"privacy,":[192],"analyze":[193],"computational":[196],"(exponentiation)":[197],"complexity,":[198],"formalize":[200],"universal":[202],"mapping":[204],"from":[205],"local":[206],"records":[207],"space.":[212],"This":[213],"scalable,":[218],"mathematically":[219],"grounded":[220],"real-world":[225],"deployments,":[227],"such":[228],"as":[229],"multi-institutional":[230],"healthcare":[231],"disease":[232],"detection,":[233],"modeling":[236],"banks":[238],"insurers,":[240],"cross-domain":[242],"fraud":[243],"detection":[244],"telecommunications":[246],"financial":[248],"institutions,":[249],"while":[250],"preserving":[251],"privacy.":[253]},"counts_by_year":[],"updated_date":"2026-04-23T06:20:18.424754","created_date":"2026-04-23T00:00:00"}
