{"id":"https://openalex.org/W2106922943","doi":"https://doi.org/10.1145/1077501.1077503","title":"Handling data quality in entity resolution","display_name":"Handling data quality in entity resolution","publication_year":2005,"publication_date":"2005-06-17","ids":{"openalex":"https://openalex.org/W2106922943","doi":"https://doi.org/10.1145/1077501.1077503","mag":"2106922943"},"language":"en","primary_location":{"id":"doi:10.1145/1077501.1077503","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1077501.1077503","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd international workshop on Information quality in information systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5055883336","display_name":"H\u00e9ctor Garc\u00eda-Molina","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hector Garcia-Molina","raw_affiliation_strings":["Stanford University, California","#N# Stanford University, California"],"affiliations":[{"raw_affiliation_string":"Stanford University, California","institution_ids":["https://openalex.org/I97018004"]},{"raw_affiliation_string":"#N# Stanford University, California","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5055883336"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":0.435,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.69631105,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9854000210762024,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10317","display_name":"Advanced Database Systems and Queries","score":0.9763000011444092,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.741515576839447},{"id":"https://openalex.org/keywords/unique-identifier","display_name":"Unique identifier","score":0.6800864934921265},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.6455448865890503},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.6234368681907654},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6122795343399048},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5861663818359375},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.560418963432312},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5559242963790894},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5322574973106384},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4377310276031494},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3353920578956604},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07745975255966187},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.07352292537689209}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.741515576839447},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.6800864934921265},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.6455448865890503},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.6234368681907654},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6122795343399048},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5861663818359375},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.560418963432312},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5559242963790894},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5322574973106384},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4377310276031494},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3353920578956604},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07745975255966187},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.07352292537689209},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1077501.1077503","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1077501.1077503","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd international workshop on Information quality in information systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2413568490","https://openalex.org/W1913624564","https://openalex.org/W3130054399","https://openalex.org/W4301062032","https://openalex.org/W2319837024","https://openalex.org/W2343500026","https://openalex.org/W2390627310","https://openalex.org/W2413664130","https://openalex.org/W2051731675","https://openalex.org/W4304208754"],"abstract_inverted_index":{"Entity":[0],"resolution":[1],"(ER)":[2],"is":[3,159,181],"a":[4,156],"problem":[5],"that":[6,35,133,158],"arises":[7],"in":[8,46,152,155],"many":[9,103],"information":[10],"integration":[11],"scenarios:":[12],"We":[13],"have":[14,57,64],"two":[15,134],"or":[16,112],"more":[17],"sources":[18,85],"containing":[19],"records":[20,39,51,82,98,136],"on":[21,171],"the":[22,47,50,53,65,80,90,96,106,119,140,150,162],"same":[23,54,91,141],"set":[24],"of":[25],"real-world":[26,92,142],"entities":[27],"(e.g.,":[28],"customers).However,":[29],"there":[30],"are":[31],"no":[32],"unique":[33],"identifiers":[34],"tell":[36],"us":[37],"what":[38],"from":[40,83],"one":[41,61],"source":[42],"correspond":[43,138],"to":[44,78,89,139,173],"those":[45,87],"other":[48],"sources.Furthermore,":[49],"representing":[52,153],"entity":[55],"may":[56,63,70,128],"differing":[58],"information,":[59],"e.g.,":[60,126],"record":[62,69],"address":[66],"misspelled,":[67],"another":[68],"be":[71,130],"missing":[72],"some":[73,168],"fields.An":[74],"ER":[75,104,120,163,175],"algorithm":[76],"attempts":[77],"identify":[79],"matching":[81,97],"multiple":[84],"(i.e.,":[86],"corresponding":[88],"entity),":[93],"and":[94,190],"merges":[95],"as":[99],"best":[100],"it":[101],"can.In":[102],"applications":[105],"input":[107],"data":[108,110],"has":[109],"quality":[111],"uncertainty":[113],"values":[114],"associated":[115],"with":[116,176,183],"it.":[117],"Furthermore,":[118],"process":[121],"itself":[122],"introduces":[123],"additional":[124],"uncertainties,":[125],"we":[127],"only":[129],"90%":[131],"confident":[132],"given":[135],"actually":[137],"entity.In":[143],"this":[144],"talk":[145],"Hector":[146],"Garcia-Molina":[147],"will":[148,165],"discuss":[149],"challenges":[151],"quality/uncertainty/confidences":[154],"way":[157],"useful":[160],"for":[161],"process.He":[164],"also":[166],"present":[167],"preliminary":[169],"ideas":[170],"how":[172],"perform":[174],"uncertain":[177],"data.":[178],"(This":[179],"work":[180],"joint":[182],"Omar":[184],"Benjelloun,":[185],"David":[186],"Menestrina,":[187],"Qi":[188],"Su,":[189],"Jennifer":[191],"Widom).":[192]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
