{"id":"https://openalex.org/W2073545563","doi":"https://doi.org/10.14778/1687627.1687690","title":"Integrating conflicting data","display_name":"Integrating conflicting data","publication_year":2009,"publication_date":"2009-08-01","ids":{"openalex":"https://openalex.org/W2073545563","doi":"https://doi.org/10.14778/1687627.1687690","mag":"2073545563"},"language":"en","primary_location":{"id":"doi:10.14778/1687627.1687690","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687690","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://inria.hal.science/hal-01855870","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001402526","display_name":"Xin Luna Dong","orcid":"https://orcid.org/0009-0000-8667-322X"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Xin Luna Dong","raw_affiliation_strings":["AT&amp;T Labs--Research, Florham Park, NJ"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs--Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091872345","display_name":"Laure Berti\u2010\u00c9quille","orcid":"https://orcid.org/0000-0002-8046-0570"},"institutions":[{"id":"https://openalex.org/I56067802","display_name":"Universit\u00e9 de Rennes","ror":"https://ror.org/015m7wh34","country_code":"FR","type":"education","lineage":["https://openalex.org/I56067802"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Laure Berti-Equille","raw_affiliation_strings":["Universit\u00e9 de Rennes, Rennes cedex, France"],"affiliations":[{"raw_affiliation_string":"Universit\u00e9 de Rennes, Rennes cedex, France","institution_ids":["https://openalex.org/I56067802"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5088315797","display_name":"Divesh Srivastava","orcid":"https://orcid.org/0000-0002-7609-9217"},"institutions":[{"id":"https://openalex.org/I1283103587","display_name":"AT&T (United States)","ror":"https://ror.org/02bbd5539","country_code":"US","type":"company","lineage":["https://openalex.org/I1283103587"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Divesh Srivastava","raw_affiliation_strings":["AT&amp;T Labs--Research, Florham Park, NJ"],"affiliations":[{"raw_affiliation_string":"AT&amp;T Labs--Research, Florham Park, NJ","institution_ids":["https://openalex.org/I1283103587"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5001402526"],"corresponding_institution_ids":["https://openalex.org/I1283103587"],"apc_list":null,"apc_paid":null,"fwci":27.3168,"has_fulltext":false,"cited_by_count":422,"citation_normalized_percentile":{"value":0.9975334,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"2","issue":"1","first_page":"550","last_page":"561"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7546716928482056},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.5713597536087036},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5581868290901184},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.4980008602142334},{"id":"https://openalex.org/keywords/copying","display_name":"Copying","score":0.4886329770088196},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4841654896736145},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.4341817796230316},{"id":"https://openalex.org/keywords/value","display_name":"Value (mathematics)","score":0.4211995601654053},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.38379037380218506},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3535459041595459},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.1576952040195465},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.14634215831756592},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.1418621838092804},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.10925567150115967}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7546716928482056},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.5713597536087036},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5581868290901184},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.4980008602142334},{"id":"https://openalex.org/C2779151265","wikidata":"https://www.wikidata.org/wiki/Q1156791","display_name":"Copying","level":2,"score":0.4886329770088196},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4841654896736145},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4341817796230316},{"id":"https://openalex.org/C2776291640","wikidata":"https://www.wikidata.org/wiki/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4211995601654053},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.38379037380218506},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3535459041595459},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.1576952040195465},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.14634215831756592},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.1418621838092804},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.10925567150115967},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.14778/1687627.1687690","is_oa":false,"landing_page_url":"https://doi.org/10.14778/1687627.1687690","pdf_url":null,"source":{"id":"https://openalex.org/S4210226185","display_name":"Proceedings of the VLDB Endowment","issn_l":"2150-8097","issn":["2150-8097"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the VLDB Endowment","raw_type":"journal-article"},{"id":"pmh:oai:HAL:hal-01855870v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01855870","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the VLDB Endowment, 35th conference on Very Large Databases, Aug 2009, Lyon, France. pp.550 - 561, &#x27E8;10.14778/1687627.1687690&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"pmh:oai:HAL:hal-01855870v1","is_oa":true,"landing_page_url":"https://inria.hal.science/hal-01855870","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the VLDB Endowment, 35th conference on Very Large Databases, Aug 2009, Lyon, France. pp.550 - 561, &#x27E8;10.14778/1687627.1687690&#x27E9;","raw_type":"Conference papers"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":15,"referenced_works":["https://openalex.org/W111905706","https://openalex.org/W136059505","https://openalex.org/W1725819718","https://openalex.org/W1981202432","https://openalex.org/W2061204063","https://openalex.org/W2066636486","https://openalex.org/W2111289574","https://openalex.org/W2111295912","https://openalex.org/W2118388899","https://openalex.org/W2120763591","https://openalex.org/W2142003896","https://openalex.org/W2155160033","https://openalex.org/W2156523427","https://openalex.org/W2293938906","https://openalex.org/W6605561650"],"related_works":["https://openalex.org/W4308771405","https://openalex.org/W2355873265","https://openalex.org/W2963669501","https://openalex.org/W3080197661","https://openalex.org/W4318471783","https://openalex.org/W2760667490","https://openalex.org/W2991781269","https://openalex.org/W775724729","https://openalex.org/W2137489486","https://openalex.org/W2094985717"],"abstract_inverted_index":{"Many":[0],"data":[1,22,46,53,149,157,225,235,240,262],"management":[2],"applications,":[3],"such":[4],"as":[5,93,236,238],"setting":[6],"up":[7],"Web":[8],"portals,":[9],"managing":[10,13],"enterprise":[11],"data,":[12,15,19],"community":[14],"and":[16,35,59,105,166,202,210,227,252],"sharing":[17],"scientific":[18],"require":[20],"integrating":[21],"from":[23,122,138,190,213],"multiple":[24],"sources.":[25,263],"Each":[26],"of":[27,33,90,131,163,168,224,249,261],"these":[28,169],"sources":[29,37,74,92,150,158,176,201,226],"provides":[30],"a":[31,66,97,128,142,160,258],"set":[32],"values":[34,121,165,170],"different":[36],"can":[38,56,82,100,245],"often":[39],"provide":[40,159],"conflicting":[41,123,214],"values.":[42,62,230],"To":[43],"present":[44,141],"quality":[45],"to":[47,69,118,197],"users,":[48],"it":[49,183],"is":[50,184,253],"critical":[51],"that":[52,106,145,187,206,242],"integration":[54],"systems":[55],"resolve":[57],"conflicts":[58],"discover":[60],"true":[61,67,120],"Typically,":[63],"we":[64,81,115],"expect":[65],"value":[68,85,99],"be":[70,101],"provided":[71,86,173],"by":[72,87,174,221],"more":[73],"than":[75],"any":[76],"particular":[77,180],"false":[78,98,181],"one,":[79],"so":[80],"take":[83],"the":[84,88,91,94,191],"majority":[89],"truth.":[95],"Unfortunately,":[96],"spread":[102],"through":[103],"copying":[104],"makes":[107],"truth":[108,152,212,250],"discovery":[109,251],"extremely":[110],"tricky.":[111],"In":[112],"this":[113],"paper,":[114],"consider":[116],"how":[117],"find":[119],"information":[124],"when":[125,255],"there":[126,256],"are":[127,171,257],"large":[129,161,259],"number":[130,162,260],"sources,":[132],"among":[133],"which":[134],"some":[135],"may":[136],"copy":[137],"others.":[139],"We":[140,193,216],"novel":[143],"approach":[144],"considers":[146],"dependence":[147,199,209],"between":[148,200,229],"in":[151],"discovery.":[153],"Intuitively,":[154],"if":[155],"two":[156],"common":[164],"many":[167],"rarely":[172],"other":[175],"(":[177],"e.g.":[178],",":[179],"values),":[182],"very":[185],"likely":[186],"one":[188],"copies":[189],"other.":[192],"apply":[194],"Bayesian":[195],"analysis":[196],"decide":[198],"design":[203],"an":[204],"algorithm":[205,244],"iteratively":[207],"detects":[208],"discovers":[211],"information.":[215],"also":[217],"extend":[218],"our":[219,243],"model":[220],"considering":[222],"accuracy":[223,248],"similarity":[228],"Our":[231],"experiments":[232],"on":[233],"synthetic":[234],"well":[237],"real-world":[239],"show":[241],"significantly":[246],"improve":[247],"scalable":[254]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":15},{"year":2023,"cited_by_count":11},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":30},{"year":2019,"cited_by_count":48},{"year":2018,"cited_by_count":48},{"year":2017,"cited_by_count":46},{"year":2016,"cited_by_count":33},{"year":2015,"cited_by_count":44},{"year":2014,"cited_by_count":22},{"year":2013,"cited_by_count":22},{"year":2012,"cited_by_count":26}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
