{"id":"https://openalex.org/W2060413568","doi":"https://doi.org/10.1145/2623330.2623685","title":"Entity profiling with varying source reliabilities","display_name":"Entity profiling with varying source reliabilities","publication_year":2014,"publication_date":"2014-08-22","ids":{"openalex":"https://openalex.org/W2060413568","doi":"https://doi.org/10.1145/2623330.2623685","mag":"2060413568"},"language":"en","primary_location":{"id":"doi:10.1145/2623330.2623685","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2623330.2623685","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100632551","display_name":"Furong Li","orcid":"https://orcid.org/0000-0002-0606-8861"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Furong Li","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019988958","display_name":"Mong Li Lee","orcid":"https://orcid.org/0000-0002-9636-388X"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Mong Li Lee","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051209739","display_name":"Wynne Hsu","orcid":"https://orcid.org/0000-0002-4142-8893"},"institutions":[{"id":"https://openalex.org/I165932596","display_name":"National University of Singapore","ror":"https://ror.org/01tgyzw49","country_code":"SG","type":"education","lineage":["https://openalex.org/I165932596"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Wynne Hsu","raw_affiliation_strings":["National University of Singapore, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"National University of Singapore, Singapore, Singapore","institution_ids":["https://openalex.org/I165932596"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100632551"],"corresponding_institution_ids":["https://openalex.org/I165932596"],"apc_list":null,"apc_paid":null,"fwci":3.3124,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.92020769,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1146","last_page":"1155"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9818000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8279106616973877},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7602936625480652},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5803543329238892},{"id":"https://openalex.org/keywords/profiling","display_name":"Profiling (computer programming)","score":0.5572973489761353},{"id":"https://openalex.org/keywords/record-linkage","display_name":"Record linkage","score":0.5286158919334412},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5072709918022156},{"id":"https://openalex.org/keywords/reliability","display_name":"Reliability (semiconductor)","score":0.499025821685791},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.497590571641922},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.13382908701896667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8279106616973877},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7602936625480652},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5803543329238892},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.5572973489761353},{"id":"https://openalex.org/C142210648","wikidata":"https://www.wikidata.org/wiki/Q1266546","display_name":"Record linkage","level":3,"score":0.5286158919334412},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5072709918022156},{"id":"https://openalex.org/C43214815","wikidata":"https://www.wikidata.org/wiki/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.499025821685791},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.497590571641922},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.13382908701896667},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C149923435","wikidata":"https://www.wikidata.org/wiki/Q37732","display_name":"Demography","level":1,"score":0.0},{"id":"https://openalex.org/C163258240","wikidata":"https://www.wikidata.org/wiki/Q25342","display_name":"Power (physics)","level":2,"score":0.0},{"id":"https://openalex.org/C2908647359","wikidata":"https://www.wikidata.org/wiki/Q2625603","display_name":"Population","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2623330.2623685","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2623330.2623685","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 20th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6499999761581421,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1521736627","https://openalex.org/W1563742761","https://openalex.org/W1569646394","https://openalex.org/W1713409046","https://openalex.org/W1979698145","https://openalex.org/W1981590391","https://openalex.org/W1996505782","https://openalex.org/W2026147624","https://openalex.org/W2040925009","https://openalex.org/W2043072004","https://openalex.org/W2052698082","https://openalex.org/W2069728146","https://openalex.org/W2073471108","https://openalex.org/W2073545563","https://openalex.org/W2082424685","https://openalex.org/W2108991785","https://openalex.org/W2123561513","https://openalex.org/W2134964692","https://openalex.org/W2148019918","https://openalex.org/W2148524305","https://openalex.org/W2155160033","https://openalex.org/W2169940602","https://openalex.org/W6637487752","https://openalex.org/W6669040500"],"related_works":["https://openalex.org/W2353179089","https://openalex.org/W2923538289","https://openalex.org/W2353125546","https://openalex.org/W2470643824","https://openalex.org/W2349635380","https://openalex.org/W4353089801","https://openalex.org/W2353819554","https://openalex.org/W2359488321","https://openalex.org/W4280518517","https://openalex.org/W3111878056"],"abstract_inverted_index":{"The":[0,137],"rapid":[1],"growth":[2],"of":[3,13,103,150,164],"information":[4],"sources":[5,28],"on":[6,134],"the":[7,11,18,61,67,104,131,162,173],"Web":[8],"has":[9],"intensified":[10],"problem":[12],"data":[14,62,91,105],"quality.":[15],"In":[16,42,112],"particular,":[17],"same":[19],"real":[20,54,186],"world":[21,55,187],"entity":[22],"may":[23],"be":[24],"described":[25],"by":[26],"different":[27],"in":[29,148],"various":[30,135],"ways":[31],"with":[32,99,125],"overlapping":[33],"information,":[34],"and":[35,49,106,152,176,182],"possibly":[36],"conflicting":[37],"or":[38],"even":[39],"erroneous":[40,74,87,165],"values.":[41,75,166],"order":[43],"to":[44,59,66,145,160,179],"obtain":[45],"a":[46,53,100,117],"more":[47],"complete":[48,181],"accurate":[50,183],"picture":[51],"for":[52,109,185],"entity,":[56,68],"we":[57,115],"need":[58],"collate":[60],"records":[63,96,147],"that":[64,78,121,170],"refer":[65],"as":[69,71],"well":[70],"correct":[72],"any":[73],"We":[76],"observe":[77],"these":[79],"two":[80],"tasks":[81],"are":[82],"often":[83],"tightly":[84],"coupled:":[85],"rectifying":[86],"values":[88],"will":[89],"facilitate":[90],"collation,":[92],"while":[93],"linking":[94],"similar":[95],"provides":[97],"us":[98],"clearer":[101],"view":[102],"additional":[107],"evidence":[108],"error":[110,126],"correction.":[111],"this":[113],"paper,":[114],"present":[116],"framework":[118,139],"called":[119],"Comet":[120,171],"interleaves":[122],"record":[123],"linkage":[124],"correction,":[127],"taking":[128],"into":[129],"consideration":[130],"source":[132,153],"reliabilities":[133],"attributes.":[136],"proposed":[138],"first":[140],"utilizes":[141],"confidence":[142],"based":[143],"matching":[144,159],"discriminate":[146],"terms":[149],"ambiguity":[151],"reliability.":[154],"Then":[155],"it":[156],"performs":[157],"adaptive":[158],"reduce":[161],"impact":[163],"Experiment":[167],"results":[168],"demonstrate":[169],"outperforms":[172],"state-of-the-art":[174],"techniques":[175],"is":[177],"able":[178],"build":[180],"profiles":[184],"entities.":[188]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":3},{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
