{"id":"https://openalex.org/W1967297847","doi":"https://doi.org/10.1145/1651449.1651453","title":"Record linkage performance for large data sets","display_name":"Record linkage performance for large data sets","publication_year":2009,"publication_date":"2009-11-06","ids":{"openalex":"https://openalex.org/W1967297847","doi":"https://doi.org/10.1145/1651449.1651453","mag":"1967297847"},"language":"en","primary_location":{"id":"doi:10.1145/1651449.1651453","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1651449.1651453","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM first international workshop on Privacy and anonymity for very large databases","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5052986694","display_name":"Jordi G\u00f3mez-Bao","orcid":null},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Jordi G\u00f3mez-Bao","raw_affiliation_strings":["Universitat Polit\u00e8cnica de Catalunya, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"Universitat Polit\u00e8cnica de Catalunya, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5062580164","display_name":"Josep-L. Larriba-Pey","orcid":"https://orcid.org/0000-0002-7070-9256"},"institutions":[{"id":"https://openalex.org/I9617848","display_name":"Universitat Polit\u00e8cnica de Catalunya","ror":"https://ror.org/03mb6wj31","country_code":"ES","type":"education","lineage":["https://openalex.org/I9617848"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Josep-L. Larriba-Pey","raw_affiliation_strings":["Universitat Polit\u00e8cnica de Catalunya, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"Universitat Polit\u00e8cnica de Catalunya, Barcelona, Spain","institution_ids":["https://openalex.org/I9617848"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5086597227","display_name":"Josepa Ribes Puig","orcid":null},"institutions":[{"id":"https://openalex.org/I4210107147","display_name":"Institut Catal\u00e0 d'Oncologia","ror":"https://ror.org/01j1eb875","country_code":"ES","type":"government","lineage":["https://openalex.org/I4210107147"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Josepa Ribes Puig","raw_affiliation_strings":["Pla Director d'Oncologia de Catalunya, l'Hospitalet de Llogregat, Spain"],"affiliations":[{"raw_affiliation_string":"Pla Director d'Oncologia de Catalunya, l'Hospitalet de Llogregat, Spain","institution_ids":["https://openalex.org/I4210107147"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5052986694"],"corresponding_institution_ids":["https://openalex.org/I9617848"],"apc_list":null,"apc_paid":null,"fwci":0.4017,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.66413155,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"9","last_page":"16"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9613000154495239,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.942799985408783,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.7558908462524414},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7135641574859619},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.688424825668335},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.5812039375305176},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.5146559476852417},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4526668190956116},{"id":"https://openalex.org/keywords/sliding-window-protocol","display_name":"Sliding window protocol","score":0.4500451982021332},{"id":"https://openalex.org/keywords/window","display_name":"Window (computing)","score":0.4432257413864136},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.4368230998516083},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40314823389053345},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.3749869465827942},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.20751222968101501},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.10918691754341125}],"concepts":[{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.7558908462524414},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7135641574859619},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.688424825668335},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.5812039375305176},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.5146559476852417},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4526668190956116},{"id":"https://openalex.org/C102392041","wikidata":"https://www.wikidata.org/wiki/Q592860","display_name":"Sliding window protocol","level":3,"score":0.4500451982021332},{"id":"https://openalex.org/C2778751112","wikidata":"https://www.wikidata.org/wiki/Q835016","display_name":"Window (computing)","level":2,"score":0.4432257413864136},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.4368230998516083},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40314823389053345},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3749869465827942},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.20751222968101501},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.10918691754341125},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1651449.1651453","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1651449.1651453","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM first international workshop on Privacy and anonymity for very large databases","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320321505","display_name":"Generalitat de Catalunya","ror":"https://ror.org/01bg62x04"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W25706487","https://openalex.org/W1525914043","https://openalex.org/W1647671624","https://openalex.org/W2024386211","https://openalex.org/W2024770506","https://openalex.org/W2034190452","https://openalex.org/W2049633694","https://openalex.org/W2053062910","https://openalex.org/W2063408112","https://openalex.org/W2073471108","https://openalex.org/W2102763740","https://openalex.org/W2105423800","https://openalex.org/W2110783067","https://openalex.org/W2112912553","https://openalex.org/W2116544254","https://openalex.org/W2123561513","https://openalex.org/W2154785834","https://openalex.org/W4214671568","https://openalex.org/W6669040500","https://openalex.org/W6676669657","https://openalex.org/W7048738093"],"related_works":["https://openalex.org/W2487032012","https://openalex.org/W2211355040","https://openalex.org/W3014558862","https://openalex.org/W2808916796","https://openalex.org/W3161390536","https://openalex.org/W2176311362","https://openalex.org/W3012491082","https://openalex.org/W3211905090","https://openalex.org/W1540269031","https://openalex.org/W1936317645"],"abstract_inverted_index":{"We":[0],"propose":[1],"new":[2],"data":[3],"structures":[4],"to":[5],"speed":[6,33],"up":[7],"Record":[8],"Linkage":[9],"that":[10],"take":[11],"advantage":[12],"of":[13,17,38,60],"the":[14,31,53,58,69],"value":[15],"distribution":[16],"usual":[18],"string":[19,71],"attributes,":[20],"like":[21,63],"name":[22],"or":[23,43,65],"surname.":[24],"Using":[25],"some":[26],"additional":[27],"memory,":[28],"we":[29],"increase":[30],"processing":[32],"by":[34],"almost":[35],"an":[36],"order":[37],"magnitude":[39],"without":[40],"losing":[41],"recall":[42],"precision":[44],"at":[45],"all.":[46],"The":[47],"improvement":[48],"achieved":[49],"is":[50],"independent":[51],"from":[52],"methods":[54],"used":[55],"for":[56],"reducing":[57],"number":[59],"record":[61],"comparisons,":[62],"Blocking":[64],"Sliding":[66],"Window,":[67],"and":[68],"specific":[70],"comparison":[72],"functions.":[73]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
