{"id":"https://openalex.org/W2516893773","doi":"https://doi.org/10.1109/tkde.2016.2606399","title":"Linking Heterogeneous Data in the Semantic Web Using Scalable and Domain-Independent Candidate Selection","display_name":"Linking Heterogeneous Data in the Semantic Web Using Scalable and Domain-Independent Candidate Selection","publication_year":2016,"publication_date":"2016-09-07","ids":{"openalex":"https://openalex.org/W2516893773","doi":"https://doi.org/10.1109/tkde.2016.2606399","mag":"2516893773"},"language":"en","primary_location":{"id":"doi:10.1109/tkde.2016.2606399","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2606399","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082272720","display_name":"Dezhao Song","orcid":"https://orcid.org/0000-0002-2553-3108"},"institutions":[{"id":"https://openalex.org/I68384125","display_name":"Thomson Reuters (United States)","ror":"https://ror.org/00m7gt169","country_code":"US","type":"company","lineage":["https://openalex.org/I68384125"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dezhao Song","raw_affiliation_strings":["Research and Development, Thomson Reuters, 610 Opperman Drive, Eagan, MN"],"affiliations":[{"raw_affiliation_string":"Research and Development, Thomson Reuters, 610 Opperman Drive, Eagan, MN","institution_ids":["https://openalex.org/I68384125"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069358812","display_name":"Yi Luo","orcid":"https://orcid.org/0000-0001-7051-219X"},"institutions":[{"id":"https://openalex.org/I1311688040","display_name":"Amazon (United States)","ror":"https://ror.org/04mv4n011","country_code":"US","type":"company","lineage":["https://openalex.org/I1311688040"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yi Luo","raw_affiliation_strings":["Amazon.com, Seattle, WA"],"affiliations":[{"raw_affiliation_string":"Amazon.com, Seattle, WA","institution_ids":["https://openalex.org/I1311688040"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065113414","display_name":"Jeff Heflin","orcid":"https://orcid.org/0000-0002-7290-1495"},"institutions":[{"id":"https://openalex.org/I186143895","display_name":"Lehigh University","ror":"https://ror.org/012afjb06","country_code":"US","type":"education","lineage":["https://openalex.org/I186143895"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jeff Heflin","raw_affiliation_strings":["Department of Computer Science and Engineering, Lehigh University, Bethlehem, PA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Lehigh University, Bethlehem, PA","institution_ids":["https://openalex.org/I186143895"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5082272720"],"corresponding_institution_ids":["https://openalex.org/I68384125"],"apc_list":null,"apc_paid":null,"fwci":4.3086,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.94370513,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"29","issue":"1","first_page":"143","last_page":"156"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10679","display_name":"Service-Oriented Architecture and Web Services","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8738583326339722},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5517024993896484},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.5312196016311646},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.530525803565979},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.457723468542099},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35435736179351807},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3460039794445038},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.21238276362419128},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.20905211567878723}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8738583326339722},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5517024993896484},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.5312196016311646},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.530525803565979},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.457723468542099},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35435736179351807},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3460039794445038},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.21238276362419128},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.20905211567878723}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tkde.2016.2606399","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tkde.2016.2606399","pdf_url":null,"source":{"id":"https://openalex.org/S30698027","display_name":"IEEE Transactions on Knowledge and Data Engineering","issn_l":"1041-4347","issn":["1041-4347","1558-2191","2326-3865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Knowledge and Data Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6499999761581421,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":50,"referenced_works":["https://openalex.org/W20190141","https://openalex.org/W44468461","https://openalex.org/W95330148","https://openalex.org/W154872207","https://openalex.org/W192652968","https://openalex.org/W1491268609","https://openalex.org/W1520740445","https://openalex.org/W1526669747","https://openalex.org/W1535469659","https://openalex.org/W1552847225","https://openalex.org/W1568705965","https://openalex.org/W1597082186","https://openalex.org/W1612155886","https://openalex.org/W1802829841","https://openalex.org/W1913703133","https://openalex.org/W1973659993","https://openalex.org/W2013093146","https://openalex.org/W2014743412","https://openalex.org/W2015191210","https://openalex.org/W2019226543","https://openalex.org/W2047029075","https://openalex.org/W2059139185","https://openalex.org/W2065259291","https://openalex.org/W2096742765","https://openalex.org/W2100548092","https://openalex.org/W2100684772","https://openalex.org/W2103734666","https://openalex.org/W2107799451","https://openalex.org/W2108991785","https://openalex.org/W2110219039","https://openalex.org/W2118100588","https://openalex.org/W2132658991","https://openalex.org/W2138745488","https://openalex.org/W2150916025","https://openalex.org/W2157896838","https://openalex.org/W2164456230","https://openalex.org/W2166988329","https://openalex.org/W2170478076","https://openalex.org/W2171450492","https://openalex.org/W2280050730","https://openalex.org/W2291215925","https://openalex.org/W2400360463","https://openalex.org/W2917757754","https://openalex.org/W3146259567","https://openalex.org/W4214588406","https://openalex.org/W6600815541","https://openalex.org/W6604099797","https://openalex.org/W6607889218","https://openalex.org/W6639731090","https://openalex.org/W6759494113"],"related_works":["https://openalex.org/W2351790455","https://openalex.org/W1561729373","https://openalex.org/W2800975405","https://openalex.org/W2384888906","https://openalex.org/W2355894329","https://openalex.org/W2570974996","https://openalex.org/W2352490706","https://openalex.org/W2357241418","https://openalex.org/W1526116433","https://openalex.org/W1598771521"],"abstract_inverted_index":{"Due":[0],"to":[1,30,46,54,66,75,89,111,120,135,179,189,192],"the":[2,6,9,37,42,76,91,105,109,121,138,174],"decentralized":[3],"nature":[4],"of":[5,44,93,98,108,124,200],"Semantic":[7,38,224],"Web,":[8,39],"same":[10,77,122],"real-world":[11,78],"entity":[12,94],"may":[13],"be":[14,190],"described":[15],"in":[16,36,187],"various":[17],"data":[18,32],"sources":[19],"with":[20,196],"different":[21],"ontologies":[22],"and":[23,34,227,239],"assigned":[24],"syntactically":[25],"distinct":[26],"identifiers.":[27],"In":[28,80],"order":[29,188],"facilitate":[31],"utilization":[33],"consumption":[35],"without":[40],"compromising":[41],"freedom":[43],"people":[45],"publish":[47],"their":[48],"data,":[49],"one":[50],"critical":[51],"problem":[52],"is":[53,63,133,209],"appropriately":[55],"interlink":[56],"such":[57,141],"heterogeneous":[58,194],"data.":[59],"This":[60],"interlinking":[61],"process":[62],"sometimes":[64],"referred":[65],"as":[67],"Entity":[68],"Matching,":[69],"i.e.,":[70],"finding":[71],"which":[72],"identifiers":[73],"refer":[74],"entity.":[79],"this":[81],"paper,":[82],"we":[83,100,145],"propose":[84,101,146],"two":[85,214],"candidate":[86,150,215],"selection":[87,216],"algorithms":[88,217,232],"improve":[90],"scalability":[92],"matching":[95,106],"systems.":[96],"First":[97],"all,":[99],"HistSim":[102],"that":[103,115,148,163,229],"utilizes":[104],"histories":[107],"instances":[110],"prune":[112],"instance":[113,151],"pairs":[114,152],"are":[116,164,171],"not":[117],"sufficiently":[118],"similar":[119,184],"pool":[123],"other":[125],"instances.":[126,185],"A":[127],"sigmoid":[128],"function":[129],"based":[130],"thresholding":[131],"method":[132],"proposed":[134,231],"automatically":[136,205],"adjust":[137],"threshold":[139],"for":[140,183,204],"commonality":[142],"on-the-fly.":[143],"Furthermore,":[144],"DisNGram":[147],"selects":[149],"by":[153],"computing":[154],"a":[155,197,202],"character-level":[156],"similarity":[157],"metric":[158],"on":[159,173,222,237],"discriminating":[160],"literal":[161,177],"values":[162,178],"chosen":[165,175],"using":[166],"domain-independent":[167],"unsupervised":[168],"learning.":[169],"Instances":[170],"indexed":[172],"predicates'":[176],"enable":[180],"efficient":[181],"look-up":[182],"Finally,":[186],"able":[191],"handle":[193],"datasets":[195],"large":[198],"number":[199],"predicates,":[201],"mechanism":[203],"determining":[206],"predicate":[207],"comparability":[208],"proposed.":[210],"We":[211],"evaluate":[212],"our":[213,230],"against":[218],"six":[219],"state-of-the-art":[220,235],"systems":[221,236],"three":[223],"Web":[225],"datasets,":[226],"demonstrate":[228],"frequently":[233],"outperform":[234],"F1-score":[238],"runtime.":[240]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":5},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":3}],"updated_date":"2026-02-25T08:12:03.925757","created_date":"2025-10-10T00:00:00"}
