{"id":"https://openalex.org/W2032110807","doi":"https://doi.org/10.1145/2487575.2487662","title":"Exploiting user clicks for automatic seed set generation for entity matching","display_name":"Exploiting user clicks for automatic seed set generation for entity matching","publication_year":2013,"publication_date":"2013-08-11","ids":{"openalex":"https://openalex.org/W2032110807","doi":"https://doi.org/10.1145/2487575.2487662","mag":"2032110807"},"language":"en","primary_location":{"id":"doi:10.1145/2487575.2487662","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2487575.2487662","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101581308","display_name":"Xiao Bai","orcid":"https://orcid.org/0000-0002-7491-2454"},"institutions":[{"id":"https://openalex.org/I2800095910","display_name":"Yahoo (Spain)","ror":"https://ror.org/03gq8sg42","country_code":"ES","type":"company","lineage":["https://openalex.org/I2800095910","https://openalex.org/I4210134091"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Xiao Bai","raw_affiliation_strings":["Yahoo! Research, Barcelona, Spain"],"affiliations":[{"raw_affiliation_string":"Yahoo! Research, Barcelona, Spain","institution_ids":["https://openalex.org/I2800095910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039618797","display_name":"Flavio Junqueira","orcid":"https://orcid.org/0009-0000-6789-5505"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210164937","display_name":"Microsoft Research (United Kingdom)","ror":"https://ror.org/05k87vq12","country_code":"GB","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210164937"]}],"countries":["GB","US"],"is_corresponding":false,"raw_author_name":"Flavio P. Junqueira","raw_affiliation_strings":["Microsoft Research, Cambridge, United Kingdom","Microsoft Research, Cambridge, United Kingdom ("],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210164937"]},{"raw_affiliation_string":"Microsoft Research, Cambridge, United Kingdom (","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038858575","display_name":"Srinivasan H. Sengamedu","orcid":"https://orcid.org/0000-0003-1847-8398"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srinivasan H. Sengamedu","raw_affiliation_strings":["Komli Labs, Bangalore, India"],"affiliations":[{"raw_affiliation_string":"Komli Labs, Bangalore, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101581308"],"corresponding_institution_ids":["https://openalex.org/I2800095910"],"apc_list":null,"apc_paid":null,"fwci":0.8219,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.77210333,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"980","last_page":"988"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8672165870666504},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6456003189086914},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6443002820014954},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6386431455612183},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6089274883270264},{"id":"https://openalex.org/keywords/schema-matching","display_name":"Schema matching","score":0.5920634865760803},{"id":"https://openalex.org/keywords/web-page","display_name":"Web page","score":0.5202711224555969},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.5028805136680603},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.46804046630859375},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4660215377807617},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.427891343832016},{"id":"https://openalex.org/keywords/data-integration","display_name":"Data integration","score":0.3093368113040924},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.28588181734085083},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2819783389568329},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.24548736214637756}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8672165870666504},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6456003189086914},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6443002820014954},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6386431455612183},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6089274883270264},{"id":"https://openalex.org/C2777327318","wikidata":"https://www.wikidata.org/wiki/Q1408390","display_name":"Schema matching","level":3,"score":0.5920634865760803},{"id":"https://openalex.org/C21959979","wikidata":"https://www.wikidata.org/wiki/Q36774","display_name":"Web page","level":2,"score":0.5202711224555969},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.5028805136680603},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.46804046630859375},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4660215377807617},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.427891343832016},{"id":"https://openalex.org/C72634772","wikidata":"https://www.wikidata.org/wiki/Q386824","display_name":"Data integration","level":2,"score":0.3093368113040924},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.28588181734085083},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2819783389568329},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24548736214637756},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2487575.2487662","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2487575.2487662","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 19th ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320322930","display_name":"Ministerio de Ciencia e Innovaci\u00f3n","ror":"https://ror.org/034900433"},{"id":"https://openalex.org/F4320338080","display_name":"European Social Fund","ror":"https://ror.org/00k4n6c32"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1593560629","https://openalex.org/W1982287794","https://openalex.org/W1994714765","https://openalex.org/W1995561370","https://openalex.org/W2001383125","https://openalex.org/W2026080185","https://openalex.org/W2029295509","https://openalex.org/W2035907850","https://openalex.org/W2041439319","https://openalex.org/W2058201162","https://openalex.org/W2063049279","https://openalex.org/W2086378526","https://openalex.org/W2100679666","https://openalex.org/W2104511295","https://openalex.org/W2111116800","https://openalex.org/W2133299088","https://openalex.org/W2133576408","https://openalex.org/W2160555926","https://openalex.org/W2163676312","https://openalex.org/W2164456230","https://openalex.org/W2170616854","https://openalex.org/W2170902582","https://openalex.org/W2171743956","https://openalex.org/W2753693129","https://openalex.org/W3145128584","https://openalex.org/W4231154391","https://openalex.org/W4285719527","https://openalex.org/W6981613111","https://openalex.org/W7029321148"],"related_works":["https://openalex.org/W1128683088","https://openalex.org/W2372910313","https://openalex.org/W3138074544","https://openalex.org/W4310041472","https://openalex.org/W2938811602","https://openalex.org/W2036644834","https://openalex.org/W2408969024","https://openalex.org/W2016611314","https://openalex.org/W2243208152","https://openalex.org/W2080890385"],"abstract_inverted_index":{"Matching":[0],"entities":[1],"from":[2,122,152],"different":[3],"information":[4,28],"sources":[5,29],"is":[6,70,161],"a":[7,76,153],"very":[8],"important":[9],"problem":[10],"in":[11],"data":[12,15,60,95,160,169],"analysis":[13],"and":[14,25,31,103,106,173],"integration.":[16],"It":[17],"is,":[18],"however,":[19],"challenging":[20],"due":[21],"to":[22,37,56,81,93,100,110,136],"the":[23,32,84,137,157,163],"number":[24],"diversity":[26],"of":[27,67,144,149,167],"involved,":[30],"significant":[33],"editorial":[34,168],"efforts":[35],"required":[36],"collect":[38],"sufficient":[39],"training":[40,59],"data.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45,127],"present":[46],"an":[47,141],"approach":[48,69],"that":[49,71,134,166],"leverages":[50],"user":[51],"clicks":[52],"during":[53],"Web":[54,72,104],"search":[55],"automatically":[57],"generate":[58],"for":[61,75],"entity":[62],"matching.":[63],"The":[64,147],"key":[65],"insight":[66],"our":[68],"pages":[73,121],"clicked":[74],"given":[77],"query":[78],"are":[79],"likely":[80],"be":[82],"about":[83],"same":[85,138],"entity.":[86],"We":[87],"use":[88],"random":[89],"walk":[90],"with":[91,140,175],"restart":[92],"reduce":[94],"sparseness,":[96],"rely":[97],"on":[98,156],"co-clustering":[99],"group":[101],"queries":[102],"pages,":[105],"exploit":[107],"page":[108],"similarity":[109],"improve":[111],"matching":[112,150],"precision.":[113],"Experimental":[114],"results":[115],"show":[116],"that:":[117],"(i)":[118],"With":[119],"360K":[120],"6":[123],"major":[124],"travel":[125],"websites,":[126],"obtain":[128],"84K":[129],"matchings":[130],"(of":[131],"179K":[132],"pages)":[133],"refer":[135],"entities,":[139],"average":[142],"precision":[143],"0.826;":[145],"(ii)":[146],"quality":[148],"obtained":[151],"classifier":[154],"trained":[155],"resulted":[158],"seed":[159],"promising:":[162],"performance":[164],"matches":[165],"at":[170],"small":[171],"size":[172],"improves":[174],"size.":[176]},"counts_by_year":[{"year":2015,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
