{"id":"https://openalex.org/W2138091453","doi":"https://doi.org/10.1109/ssdm.2003.1214942","title":"Entity level data integration by statistical methods","display_name":"Entity level data integration by statistical methods","publication_year":2005,"publication_date":"2005-04-12","ids":{"openalex":"https://openalex.org/W2138091453","doi":"https://doi.org/10.1109/ssdm.2003.1214942","mag":"2138091453"},"language":"en","primary_location":{"id":"doi:10.1109/ssdm.2003.1214942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssdm.2003.1214942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"15th International Conference on Scientific and Statistical Database Management, 2003.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5111945003","display_name":"Hans\u2010J. Lenz","orcid":null},"institutions":[{"id":"https://openalex.org/I75951250","display_name":"Freie Universit\u00e4t Berlin","ror":"https://ror.org/046ak2485","country_code":"DE","type":"education","lineage":["https://openalex.org/I75951250"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Hans-J. Lenz","raw_affiliation_strings":["Free University of Amsterdam, Berlin, Germany","Free University, Berlin,"],"affiliations":[{"raw_affiliation_string":"Free University of Amsterdam, Berlin, Germany","institution_ids":["https://openalex.org/I75951250"]},{"raw_affiliation_string":"Free University, Berlin,","institution_ids":["https://openalex.org/I75951250"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5111945003"],"corresponding_institution_ids":["https://openalex.org/I75951250"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.20147316,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3","last_page":"2"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.8128291368484497},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7523117661476135},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.7037662267684937},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.6584500670433044},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6053646206855774},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5150241851806641},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.4670001268386841},{"id":"https://openalex.org/keywords/unique-identifier","display_name":"Unique identifier","score":0.44082680344581604},{"id":"https://openalex.org/keywords/association-rule-learning","display_name":"Association rule learning","score":0.4331223964691162},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.42908474802970886},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3999215066432953},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.328193336725235},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3210332989692688}],"concepts":[{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.8128291368484497},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7523117661476135},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.7037662267684937},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.6584500670433044},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6053646206855774},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5150241851806641},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.4670001268386841},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.44082680344581604},{"id":"https://openalex.org/C193524817","wikidata":"https://www.wikidata.org/wiki/Q386780","display_name":"Association rule learning","level":2,"score":0.4331223964691162},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.42908474802970886},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3999215066432953},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.328193336725235},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3210332989692688},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ssdm.2003.1214942","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ssdm.2003.1214942","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"15th International Conference on Scientific and Statistical Database Management, 2003.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W1532364676","https://openalex.org/W1594031697","https://openalex.org/W2073471108","https://openalex.org/W2166559705","https://openalex.org/W2767905780","https://openalex.org/W3085162807","https://openalex.org/W4230502578","https://openalex.org/W4388782171","https://openalex.org/W6631843378"],"related_works":["https://openalex.org/W2413568490","https://openalex.org/W1913624564","https://openalex.org/W3130054399","https://openalex.org/W4301062032","https://openalex.org/W2319837024","https://openalex.org/W2548183822","https://openalex.org/W4294243532","https://openalex.org/W2911599090","https://openalex.org/W2989796854","https://openalex.org/W2141965543"],"abstract_inverted_index":{"In":[0],"most":[1],"cases":[2],"unique":[3,15],"identifiers":[4],"are":[5,17,103,149],"required":[6],"to":[7,41,47,51,58,82,95,105,172],"join":[8],"data":[9,24,89,187],"from":[10,26,188,193],"different":[11,27,92],"databases.":[12],"If":[13],"global":[14],"keys":[16],"absent":[18],"or":[19,54,138],"corrupted":[20],"the":[21,98,101,110,119,146,174,178,194],"supplement":[22],"of":[23,72,88,100,121,145,177,205],"extracted":[25],"sources":[28],"becomes":[29],"difficult.":[30],"The":[31,78,142,181],"main":[32],"question":[33],"is:":[34],"Does":[35],"a":[36,59,75,203,206],"given":[37],"record":[38,198],"is":[39,45,81,112],"related":[40],"an":[42,48,115,122],"entity":[43,49],"which":[44,200],"identical":[46,67],"corresponding":[50],"another":[52],"record,":[53],"not?":[55],"This":[56],"leads":[57],"classification":[60,111,126,147],"problem":[61],"with":[62],"at":[63],"least":[64],"two":[65,189],"classes:":[66],"and":[68,153,192],"not":[69],"identical.Classifying":[70],"pairs":[71],"records":[73,102],"needs":[74],"three-step":[76],"procedure.":[77],"first":[79],"step":[80],"define":[83],"suitable":[84],"common":[85,107],"properties":[86],"(attributes)":[87],"for":[90,162,186],"all":[91],"sources.":[93],"Secondly,":[94],"allow":[96],"comparisons":[97],"values":[99],"transformed":[104],"this":[106],"properties.":[108],"Finally,":[109],"performed":[113],"on":[114],"almost":[116],"finite":[117],"subset,":[118],"range":[120],"appropriate":[123],"comparison":[124],"function.Different":[125],"techniques":[127],"can":[128,159],"be":[129,160,170,184],"applied":[130],"like":[131],"Association":[132],"Rules,":[133],"Classification":[134],"Trees,":[135],"Neural":[136],"networks":[137],"Record":[139],"Linkage":[140],"techniques.":[141],"unknown":[143],"parameters":[144],"rules":[148],"computed":[150],"by":[151,164],"sampling":[152],"supervised":[154],"learning.":[155],"Unbiased":[156],"error":[157],"rates":[158],"estimated":[161],"instance":[163],"cross":[165],"validation.":[166],"Special":[167],"attention":[168],"must":[169],"paid":[171],"control":[173],"computing":[175],"complexity":[176],"identification":[179],"process.":[180],"approach":[182],"will":[183,201],"illustrated":[185],"library":[190],"databases":[191],"planned":[195],"German":[196],"administrative":[197],"census,":[199],"become":[202],"substitute":[204],"regular":[207],"census.":[208]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
