{"id":"https://openalex.org/W2792495104","doi":"https://doi.org/10.1109/jbhi.2018.2796941","title":"On the Accuracy and Scalability of Probabilistic Data Linkage Over the Brazilian 114 Million Cohort","display_name":"On the Accuracy and Scalability of Probabilistic Data Linkage Over the Brazilian 114 Million Cohort","publication_year":2018,"publication_date":"2018-02-16","ids":{"openalex":"https://openalex.org/W2792495104","doi":"https://doi.org/10.1109/jbhi.2018.2796941","mag":"2792495104","pmid":"https://pubmed.ncbi.nlm.nih.gov/29505402"},"language":"en","primary_location":{"id":"doi:10.1109/jbhi.2018.2796941","is_oa":true,"landing_page_url":"https://doi.org/10.1109/jbhi.2018.2796941","pdf_url":null,"source":{"id":"https://openalex.org/S2495854775","display_name":"IEEE Journal of Biomedical and Health Informatics","issn_l":"2168-2194","issn":["2168-2194","2168-2208"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1109/jbhi.2018.2796941","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000150014","display_name":"Robespierre Pita","orcid":"https://orcid.org/0000-0002-0616-620X"},"institutions":[{"id":"https://openalex.org/I126158947","display_name":"Universidade Federal da Bahia","ror":"https://ror.org/03k3p7647","country_code":"BR","type":"education","lineage":["https://openalex.org/I126158947"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Robespierre Pita","raw_affiliation_strings":["Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil"],"raw_orcid":"https://orcid.org/0000-0002-0616-620X","affiliations":[{"raw_affiliation_string":"Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil","institution_ids":["https://openalex.org/I126158947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072907443","display_name":"Cl\u00edcia Pinto","orcid":null},"institutions":[{"id":"https://openalex.org/I126158947","display_name":"Universidade Federal da Bahia","ror":"https://ror.org/03k3p7647","country_code":"BR","type":"education","lineage":["https://openalex.org/I126158947"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Cl\u00edcia Pinto","raw_affiliation_strings":["Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil","institution_ids":["https://openalex.org/I126158947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5112297895","display_name":"Samila Sena","orcid":null},"institutions":[{"id":"https://openalex.org/I126158947","display_name":"Universidade Federal da Bahia","ror":"https://ror.org/03k3p7647","country_code":"BR","type":"education","lineage":["https://openalex.org/I126158947"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Samila Sena","raw_affiliation_strings":["Institute of Mathematics and Statistics, Department of Statistics, Federal University of Bahia, Salvador, Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Mathematics and Statistics, Department of Statistics, Federal University of Bahia, Salvador, Brazil","institution_ids":["https://openalex.org/I126158947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009811249","display_name":"Rosemeire Leovigildo Fiaccone","orcid":"https://orcid.org/0000-0001-5439-1551"},"institutions":[{"id":"https://openalex.org/I126158947","display_name":"Universidade Federal da Bahia","ror":"https://ror.org/03k3p7647","country_code":"BR","type":"education","lineage":["https://openalex.org/I126158947"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Rosemeire Fiaccone","raw_affiliation_strings":["Institute of Mathematics and Statistics, Department of Statistics, Federal University of Bahia, Salvador, Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Mathematics and Statistics, Department of Statistics, Federal University of Bahia, Salvador, Brazil","institution_ids":["https://openalex.org/I126158947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026083338","display_name":"Leila Denise Alves Ferreira Amorim","orcid":"https://orcid.org/0000-0002-1112-2332"},"institutions":[{"id":"https://openalex.org/I126158947","display_name":"Universidade Federal da Bahia","ror":"https://ror.org/03k3p7647","country_code":"BR","type":"education","lineage":["https://openalex.org/I126158947"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Leila Amorim","raw_affiliation_strings":["Institute of Mathematics and Statistics, Department of Statistics, Federal University of Bahia, Salvador, Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Mathematics and Statistics, Department of Statistics, Federal University of Bahia, Salvador, Brazil","institution_ids":["https://openalex.org/I126158947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089501135","display_name":"Sandra Reis","orcid":"https://orcid.org/0000-0003-2092-3307"},"institutions":[{"id":"https://openalex.org/I52325","display_name":"Funda\u00e7\u00e3o Oswaldo Cruz","ror":"https://ror.org/04jhswv08","country_code":"BR","type":"facility","lineage":["https://openalex.org/I52325"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Sandra Reis","raw_affiliation_strings":["Centre for Data and Knowledge Integration for Health (CIDACS), Oswaldo Cruz Foundation, Salvador, Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Data and Knowledge Integration for Health (CIDACS), Oswaldo Cruz Foundation, Salvador, Brazil","institution_ids":["https://openalex.org/I52325"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5042413273","display_name":"Maur\u00edcio L. Barreto","orcid":"https://orcid.org/0000-0002-0215-4930"},"institutions":[{"id":"https://openalex.org/I126158947","display_name":"Universidade Federal da Bahia","ror":"https://ror.org/03k3p7647","country_code":"BR","type":"education","lineage":["https://openalex.org/I126158947"]},{"id":"https://openalex.org/I52325","display_name":"Funda\u00e7\u00e3o Oswaldo Cruz","ror":"https://ror.org/04jhswv08","country_code":"BR","type":"facility","lineage":["https://openalex.org/I52325"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Mauricio L. Barreto","raw_affiliation_strings":["Centre for Data and Knowledge Integration for Health (CIDACS), Oswaldo Cruz Foundation, Salvador, Brazil","Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Centre for Data and Knowledge Integration for Health (CIDACS), Oswaldo Cruz Foundation, Salvador, Brazil","institution_ids":["https://openalex.org/I52325"]},{"raw_affiliation_string":"Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil","institution_ids":["https://openalex.org/I126158947"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073010916","display_name":"Spiros Denaxas","orcid":"https://orcid.org/0000-0001-9612-7791"},"institutions":[{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Spiros Denaxas","raw_affiliation_strings":["Institute of Health Informatics, University College London, London, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Health Informatics, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000123273","display_name":"Marcos Barreto","orcid":"https://orcid.org/0000-0002-7818-1855"},"institutions":[{"id":"https://openalex.org/I126158947","display_name":"Universidade Federal da Bahia","ror":"https://ror.org/03k3p7647","country_code":"BR","type":"education","lineage":["https://openalex.org/I126158947"]},{"id":"https://openalex.org/I45129253","display_name":"University College London","ror":"https://ror.org/02jx3x895","country_code":"GB","type":"education","lineage":["https://openalex.org/I124357947","https://openalex.org/I45129253"]},{"id":"https://openalex.org/I52325","display_name":"Funda\u00e7\u00e3o Oswaldo Cruz","ror":"https://ror.org/04jhswv08","country_code":"BR","type":"facility","lineage":["https://openalex.org/I52325"]}],"countries":["BR","GB"],"is_corresponding":false,"raw_author_name":"Marcos Ennes Barreto","raw_affiliation_strings":["Centre for Data and Knowledge Integration for Health (CIDACS), Oswaldo Cruz Foundation, Salvador, Brazil","Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil","Institute of Health Informatics, University College London, London, U.K"],"raw_orcid":"https://orcid.org/0000-0002-7818-1855","affiliations":[{"raw_affiliation_string":"Centre for Data and Knowledge Integration for Health (CIDACS), Oswaldo Cruz Foundation, Salvador, Brazil","institution_ids":["https://openalex.org/I52325"]},{"raw_affiliation_string":"Institute of Mathematics and Statistics, Computer Science Department, Federal University of Bahia, Salvador, Brazil","institution_ids":["https://openalex.org/I126158947"]},{"raw_affiliation_string":"Institute of Health Informatics, University College London, London, U.K","institution_ids":["https://openalex.org/I45129253"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5000150014"],"corresponding_institution_ids":["https://openalex.org/I126158947"],"apc_list":null,"apc_paid":{"value":1581,"currency":"EUR","value_usd":1705},"fwci":4.6336,"has_fulltext":false,"cited_by_count":51,"citation_normalized_percentile":{"value":0.94735565,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"22","issue":"2","first_page":"346","last_page":"353"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9354000091552734,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11396","display_name":"Artificial Intelligence in Healthcare","score":0.9126999974250793,"subfield":{"id":"https://openalex.org/subfields/3605","display_name":"Health Information Management"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.7672996520996094},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7500556707382202},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.7413390874862671},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6842525005340576},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6427154541015625},{"id":"https://openalex.org/keywords/linkage","display_name":"Linkage (software)","score":0.5929315686225891},{"id":"https://openalex.org/keywords/population","display_name":"Population","score":0.4564284682273865},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.41542795300483704},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2730949819087982},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.20037227869033813},{"id":"https://openalex.org/keywords/medicine","display_name":"Medicine","score":0.16298213601112366}],"concepts":[{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.7672996520996094},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7500556707382202},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.7413390874862671},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6842525005340576},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6427154541015625},{"id":"https://openalex.org/C31266012","wikidata":"https://www.wikidata.org/wiki/Q6554340","display_name":"Linkage (software)","level":3,"score":0.5929315686225891},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.4564284682273865},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.41542795300483704},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2730949819087982},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.20037227869033813},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.16298213601112366},{"id":"https://openalex.org/C99454951","wikidata":"https://www.wikidata.org/wiki/Q932068","display_name":"Environmental health","level":1,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0}],"mesh":[{"descriptor_ui":"D001938","descriptor_name":"Brazil","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001938","descriptor_name":"Brazil","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D001938","descriptor_name":"Brazil","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015331","descriptor_name":"Cohort Studies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015331","descriptor_name":"Cohort Studies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D015331","descriptor_name":"Cohort Studies","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016247","descriptor_name":"Information Storage and Retrieval","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057286","descriptor_name":"Electronic Health Records","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057286","descriptor_name":"Electronic Health Records","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D057286","descriptor_name":"Electronic Health Records","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":6,"locations":[{"id":"doi:10.1109/jbhi.2018.2796941","is_oa":true,"landing_page_url":"https://doi.org/10.1109/jbhi.2018.2796941","pdf_url":null,"source":{"id":"https://openalex.org/S2495854775","display_name":"IEEE Journal of Biomedical and Health Informatics","issn_l":"2168-2194","issn":["2168-2194","2168-2208"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics","raw_type":"journal-article"},{"id":"pmid:29505402","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/29505402","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE journal of biomedical and health informatics","raw_type":null},{"id":"pmh:oai:www.arca.fiocruz.br:icict/26425","is_oa":true,"landing_page_url":"https://www.arca.fiocruz.br/handle/icict/26425","pdf_url":null,"source":{"id":"https://openalex.org/S4306402641","display_name":"LA Referencia (Red Federada de Repositorios Institucionales de Publicaciones Cient\u00edficas)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4383465926","host_organization_name":"LA Referencia","host_organization_lineage":["https://openalex.org/I4383465926"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"reponame:Reposit\u00f3rio Institucional da FIOCRUZ (ARCA)","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:arca.fiocruz.br:icict/26425","is_oa":true,"landing_page_url":"https://arca.fiocruz.br/handle/icict/26425","pdf_url":null,"source":{"id":"https://openalex.org/S7407055450","display_name":"Arca - Reposit\u00f3rio Institucional da Fiocruz","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"journal article"},{"id":"pmh:oai:eprints.ucl.ac.uk.OAI2:10044322","is_oa":false,"landing_page_url":"https://discovery.ucl.ac.uk/id/eprint/10044322/","pdf_url":null,"source":{"id":"https://openalex.org/S4306400024","display_name":"UCL Discovery (University College London)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45129253","host_organization_name":"University College London","host_organization_lineage":["https://openalex.org/I45129253"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"   IEEE Journal of Biomedical and Health Informatics       (2018)     (In press).  ","raw_type":"Article"},{"id":"pmh:oai:pubmedcentral.nih.gov:7198121","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/7198121","pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE J Biomed Health Inform","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1109/jbhi.2018.2796941","is_oa":true,"landing_page_url":"https://doi.org/10.1109/jbhi.2018.2796941","pdf_url":null,"source":{"id":"https://openalex.org/S2495854775","display_name":"IEEE Journal of Biomedical and Health Informatics","issn_l":"2168-2194","issn":["2168-2194","2168-2208"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Journal of Biomedical and Health Informatics","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7400000095367432,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[{"id":"https://openalex.org/G1877862647","display_name":null,"funder_award_id":"MR/K006584/1","funder_id":"https://openalex.org/F4320311106","funder_display_name":"National Institute for Social Care and Health Research"},{"id":"https://openalex.org/G205952512","display_name":"Centre for Health service and Academic Partnership in Translational eHealth Research (CHAPTER)","funder_award_id":"MR/K006584/1","funder_id":"https://openalex.org/F4320334626","funder_display_name":"Medical Research Council"},{"id":"https://openalex.org/G2107972360","display_name":null,"funder_award_id":"MR/K006584/1","funder_id":"https://openalex.org/F4320334626","funder_display_name":"Medical Research Council"},{"id":"https://openalex.org/G3335867693","display_name":null,"funder_award_id":"MR/K006584/1","funder_id":"https://openalex.org/F4320311904","funder_display_name":"Wellcome Trust"},{"id":"https://openalex.org/G4457630332","display_name":null,"funder_award_id":"K006584/1","funder_id":"https://openalex.org/F4320334626","funder_display_name":"Medical Research Council"},{"id":"https://openalex.org/G4842289958","display_name":null,"funder_award_id":"RP-PG-040710314","funder_id":"https://openalex.org/F4320319990","funder_display_name":"National Institute for Health and Care Research"},{"id":"https://openalex.org/G5071957832","display_name":null,"funder_award_id":"086091","funder_id":"https://openalex.org/F4320311904","funder_display_name":"Wellcome Trust"},{"id":"https://openalex.org/G7426647716","display_name":null,"funder_award_id":"OPP1161996","funder_id":"https://openalex.org/F4320306137","funder_display_name":"Bill and Melinda Gates Foundation"},{"id":"https://openalex.org/G7643588946","display_name":null,"funder_award_id":"NF160879","funder_id":"https://openalex.org/F4320320006","funder_display_name":"Royal Society"},{"id":"https://openalex.org/G8974814185","display_name":null,"funder_award_id":"086091/Z/08/Z","funder_id":"https://openalex.org/F4320311904","funder_display_name":"Wellcome Trust"}],"funders":[{"id":"https://openalex.org/F4320306137","display_name":"Bill and Melinda Gates Foundation","ror":"https://ror.org/0456r8d26"},{"id":"https://openalex.org/F4320311106","display_name":"National Institute for Social Care and Health Research","ror":"https://ror.org/03w4jzj90"},{"id":"https://openalex.org/F4320311904","display_name":"Wellcome Trust","ror":"https://ror.org/029chgv08"},{"id":"https://openalex.org/F4320319990","display_name":"National Institute for Health and Care Research","ror":"https://ror.org/0187kwz08"},{"id":"https://openalex.org/F4320319992","display_name":"British Heart Foundation","ror":"https://ror.org/02wdwnk04"},{"id":"https://openalex.org/F4320320006","display_name":"Royal Society","ror":"https://ror.org/03wnrjx87"},{"id":"https://openalex.org/F4320322025","display_name":"Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico","ror":"https://ror.org/03swz6y49"},{"id":"https://openalex.org/F4320322904","display_name":"Financiadora de Estudos e Projetos","ror":"https://ror.org/030w99567"},{"id":"https://openalex.org/F4320323681","display_name":"Funda\u00e7\u00e3o de Amparo \u00e0 Pesquisa do Estado da Bahia","ror":"https://ror.org/0473khm44"},{"id":"https://openalex.org/F4320334626","display_name":"Medical Research Council","ror":"https://ror.org/03x94j517"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334630","display_name":"Economic and Social Research Council","ror":"https://ror.org/03n0ht308"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W46659105","https://openalex.org/W198971967","https://openalex.org/W286941779","https://openalex.org/W1570596343","https://openalex.org/W1597164057","https://openalex.org/W1787332437","https://openalex.org/W1870305865","https://openalex.org/W1946416073","https://openalex.org/W1975356361","https://openalex.org/W1977843657","https://openalex.org/W1997927541","https://openalex.org/W2021535356","https://openalex.org/W2022475295","https://openalex.org/W2067314597","https://openalex.org/W2090431676","https://openalex.org/W2098554292","https://openalex.org/W2099846538","https://openalex.org/W2109834209","https://openalex.org/W2112599594","https://openalex.org/W2123845384","https://openalex.org/W2135803349","https://openalex.org/W2140926830","https://openalex.org/W2141965543","https://openalex.org/W2158016453","https://openalex.org/W2192710790","https://openalex.org/W2395205829","https://openalex.org/W2395782743","https://openalex.org/W2397871834","https://openalex.org/W2427930862","https://openalex.org/W2482683773","https://openalex.org/W2499987439","https://openalex.org/W2559870814","https://openalex.org/W2565966619","https://openalex.org/W2597291760","https://openalex.org/W2605123956","https://openalex.org/W2739969158","https://openalex.org/W6608130435","https://openalex.org/W6633928172","https://openalex.org/W6712249156","https://openalex.org/W6712521301"],"related_works":["https://openalex.org/W2784199898","https://openalex.org/W2487032012","https://openalex.org/W2211355040","https://openalex.org/W2808916796","https://openalex.org/W2176311362","https://openalex.org/W3012491082","https://openalex.org/W2024018837","https://openalex.org/W3211905090","https://openalex.org/W1936317645","https://openalex.org/W4310568775"],"abstract_inverted_index":{"Data":[0],"linkage":[1,69,166],"refers":[2],"to":[3,13,73,98,129,208,238,253],"the":[4,14,112,115,130,135,137,144,180,240],"process":[5],"of":[6,53,60,79,87,104,114,134,139,154,202,225,232],"identifying":[7],"and":[8,39,45,51,95,109,122,132,143,172,187,190,211,218,250],"linking":[9,121,197],"records":[10,35,75,105,256],"that":[11],"refer":[12],"same":[15],"entity":[16],"across":[17,28,66],"multiple":[18],"heterogeneous":[19,262],"data":[20,176],"sources.":[21],"This":[22],"method":[23],"has":[24],"been":[25],"widely":[26],"utilized":[27],"scientific":[29],"domains,":[30],"including":[31],"public":[32,54,209],"health":[33,210],"where":[34],"from":[36,196],"clinical,":[37],"administrative,":[38],"other":[40],"surveillance":[41],"databases":[42,213],"are":[43,71],"aggregated":[44],"used":[46,72],"for":[47,110,169,214],"research,":[48],"decision":[49],"making,":[50],"assessment":[52],"policies.":[55],"When":[56],"a":[57,77,84,101,140,150,163,198],"common":[58],"set":[59],"unique":[61],"identifiers":[62],"does":[63],"not":[64,108],"exist":[65],"sources,":[67],"probabilistic":[68,165,188],"approaches":[70],"link":[74,239],"using":[76,248],"combination":[78],"attributes.":[80],"These":[81],"methods":[82],"require":[83],"careful":[85],"choice":[86],"comparison":[88],"attributes":[89],"as":[90,92],"well":[91],"similarity":[93],"metrics":[94],"cutoff":[96],"values":[97],"decide":[99],"if":[100],"given":[102],"pair":[103],"matches":[106],"or":[107],"assessing":[111,123],"accuracy":[113,124,171,191,224],"results.":[116],"In":[117,157,216,230],"large,":[118],"complex":[119],"datasets,":[120],"can":[125],"be":[126],"challenging":[127],"due":[128],"volume":[131],"complexity":[133],"data,":[136],"absence":[138],"gold":[141],"standard,":[142],"challenges":[145],"associated":[146],"with":[147],"manually":[148],"reviewing":[149],"very":[151],"large":[152,199],"number":[153],"record":[155],"matches.":[156,229],"this":[158],"paper,":[159],"we":[160,221,234],"present":[161,194,235],"AtyImo,":[162],"hybrid":[164],"tool":[167],"optimized":[168],"high":[170,223],"scalability":[173],"in":[174,206,243,257],"massive":[175],"sets.":[177],"We":[178,193],"describe":[179],"implementation":[181],"details":[182],"around":[183],"anonymization,":[184],"blocking,":[185],"deterministic":[186],"linkage,":[189],"assessment.":[192],"results":[195],"population-based":[200],"cohort":[201,242],"114":[203],"million":[204,255],"individuals":[205],"Brazil":[207],"administrative":[212],"research.":[215],"controlled":[217],"real":[219],"scenarios,":[220],"observed":[222],"results:":[226],"93%-97%":[227],"true":[228],"terms":[231],"scalability,":[233],"AtyImo's":[236],"ability":[237],"entire":[241],"less":[244,258],"than":[245,259],"nine":[246],"days":[247],"Spark":[249],"scaling":[251],"up":[252],"20":[254],"12s":[260],"over":[261],"(CPU+GPU)":[263],"architectures.":[264]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":3}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
