{"id":"https://openalex.org/W2794286494","doi":"https://doi.org/10.32614/rj-2010-017","title":"The RecordLinkage Package: Detecting Errors in Data","display_name":"The RecordLinkage Package: Detecting Errors in Data","publication_year":2010,"publication_date":"2010-01-01","ids":{"openalex":"https://openalex.org/W2794286494","doi":"https://doi.org/10.32614/rj-2010-017","mag":"2794286494"},"language":"en","primary_location":{"id":"doi:10.32614/rj-2010-017","is_oa":true,"landing_page_url":"https://doi.org/10.32614/rj-2010-017","pdf_url":"https://journal.r-project.org/archive/2010/RJ-2010-017/RJ-2010-017.pdf","source":{"id":"https://openalex.org/S2489169438","display_name":"The R Journal","issn_l":"2073-4859","issn":["2073-4859"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The R Journal","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://journal.r-project.org/archive/2010/RJ-2010-017/RJ-2010-017.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5021788298","display_name":"Murat Sariyar","orcid":"https://orcid.org/0000-0002-5595-689X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Murat Sariyar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5014815043","display_name":"Andreas Borg","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andreas Borg","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":2.8104,"has_fulltext":true,"cited_by_count":105,"citation_normalized_percentile":{"value":0.91261638,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"2","issue":"2","first_page":"61","last_page":"61"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10764","display_name":"Privacy-Preserving Technologies in Data","score":0.9764999747276306,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12034","display_name":"Digital and Cyber Forensics","score":0.9696000218391418,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/r-package","display_name":"R package","score":0.7288455367088318},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5372398495674133},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.18644949793815613}],"concepts":[{"id":"https://openalex.org/C2984074130","wikidata":"https://www.wikidata.org/wiki/Q73539779","display_name":"R package","level":2,"score":0.7288455367088318},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5372398495674133},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.18644949793815613}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.32614/rj-2010-017","is_oa":true,"landing_page_url":"https://doi.org/10.32614/rj-2010-017","pdf_url":"https://journal.r-project.org/archive/2010/RJ-2010-017/RJ-2010-017.pdf","source":{"id":"https://openalex.org/S2489169438","display_name":"The R Journal","issn_l":"2073-4859","issn":["2073-4859"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The R Journal","raw_type":"journal-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.343.8437","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.343.8437","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://journal.r-project.org/archive/2010-2/RJournal_2010-2_Sariyar+Borg.pdf","raw_type":"text"},{"id":"pmh:oai:digitalcommons.unl.edu:r-journal-1306","is_oa":false,"landing_page_url":"https://digitalcommons.unl.edu/r-journal/303","pdf_url":null,"source":{"id":"https://openalex.org/S4306400577","display_name":"Lincoln (University of Nebraska)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114395901","host_organization_name":"University of Nebraska\u2013Lincoln","host_organization_lineage":["https://openalex.org/I114395901"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"The R Journal","raw_type":"text"}],"best_oa_location":{"id":"doi:10.32614/rj-2010-017","is_oa":true,"landing_page_url":"https://doi.org/10.32614/rj-2010-017","pdf_url":"https://journal.r-project.org/archive/2010/RJ-2010-017/RJ-2010-017.pdf","source":{"id":"https://openalex.org/S2489169438","display_name":"The R Journal","issn_l":"2073-4859","issn":["2073-4859"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The R Journal","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2794286494.pdf","grobid_xml":"https://content.openalex.org/works/W2794286494.grobid-xml"},"referenced_works_count":4,"referenced_works":["https://openalex.org/W41404523","https://openalex.org/W2072510577","https://openalex.org/W4319054059","https://openalex.org/W4399576890"],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W2350741829","https://openalex.org/W2530322880","https://openalex.org/W1596801655"],"abstract_inverted_index":{"Record":[0],"linkage":[1,22],"deals":[2],"with":[3],"detecting":[4],"homonyms":[5],"and":[6,18,73,83,114,127,132],"mainly":[7],"synonyms":[8],"in":[9,42],"data.":[10],"The":[11,36,78],"package":[12],"RecordLinkage":[13],"provides":[14],"means":[15],"to":[16,99,110],"perform":[17],"evaluate":[19],"different":[20],"record":[21,81],"methods.":[23],"A":[24],"stochastic":[25],"framework":[26],"is":[27],"implemented":[28],"which":[29],"calculates":[30],"weights":[31],"through":[32],"an":[33],"EM":[34],"algorithm.":[35],"determination":[37],"of":[38,50,80],"the":[39],"necessary":[40],"thresholds":[41],"this":[43],"model":[44],"can":[45,96,118],"be":[46,97,100,119],"achieved":[47],"by":[48],"tools":[49],"extreme":[51],"value":[52],"theory.":[53],"Furthermore,":[54],"machine":[55],"learning":[56],"methods":[57],"are":[58,90],"utilized,":[59],"including":[60],"decision":[61],"trees":[62],"(rpart),":[63],"bootstrap":[64],"aggregating":[65],"(bagging),":[66],"ada":[67],"boost":[68],"(ada),":[69],"neural":[70],"nets":[71],"(nnet)":[72],"support":[74],"vector":[75],"machines":[76],"(svm).":[77],"generation":[79],"pairs":[82],"comparison":[84],"patterns":[85,95],"from":[86],"single":[87],"data":[88],"items":[89],"provided":[91],"as":[92],"well.":[93],"Comparison":[94],"chosen":[98],"binary":[101],"or":[102],"based":[103],"on":[104,125],"some":[105],"string":[106],"metrics.":[107],"In":[108],"order":[109],"reduce":[111],"computation":[112],"time":[113],"memory":[115],"usage,":[116],"blocking":[117],"used.":[120],"Future":[121],"development":[122],"will":[123],"concentrate":[124],"additional":[126],"refined":[128],"methods,":[129],"performance":[130],"improvements":[131],"input/output":[133],"facilities":[134],"needed":[135],"for":[136],"real-world":[137],"application.":[138]},"counts_by_year":[{"year":2026,"cited_by_count":7},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":8},{"year":2023,"cited_by_count":13},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":13},{"year":2019,"cited_by_count":7},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":7},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":1},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
