{"id":"https://openalex.org/W2063451677","doi":"https://doi.org/10.1145/2187980.2188165","title":"Combining classification with clustering for web person disambiguation","display_name":"Combining classification with clustering for web person disambiguation","publication_year":2012,"publication_date":"2012-04-16","ids":{"openalex":"https://openalex.org/W2063451677","doi":"https://doi.org/10.1145/2187980.2188165","mag":"2063451677"},"language":"en","primary_location":{"id":"doi:10.1145/2187980.2188165","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2187980.2188165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on World Wide Web","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101549939","display_name":"Jian Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":true,"raw_author_name":"Jian Xu","raw_affiliation_strings":["The Hong Kong Polytechnic University, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University, Hong Kong, China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016391596","display_name":"Qin Lu","orcid":"https://orcid.org/0000-0002-9092-2476"},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Qin Lu","raw_affiliation_strings":["The Hong Kong Polytechnic University, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University, Hong Kong, China","institution_ids":["https://openalex.org/I14243506"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5029648803","display_name":"Zhengzhong Liu","orcid":null},"institutions":[{"id":"https://openalex.org/I14243506","display_name":"Hong Kong Polytechnic University","ror":"https://ror.org/0030zas98","country_code":"HK","type":"education","lineage":["https://openalex.org/I14243506"]}],"countries":["HK"],"is_corresponding":false,"raw_author_name":"Zhengzhong Liu","raw_affiliation_strings":["The Hong Kong Polytechnic University, Hong Kong, China"],"affiliations":[{"raw_affiliation_string":"The Hong Kong Polytechnic University, Hong Kong, China","institution_ids":["https://openalex.org/I14243506"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101549939"],"corresponding_institution_ids":["https://openalex.org/I14243506"],"apc_list":null,"apc_paid":null,"fwci":2.0699,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.87703532,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"637","last_page":"638"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9944999814033508,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9731000065803528,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.932699978351593,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7620872259140015},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7244129776954651},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5805438756942749},{"id":"https://openalex.org/keywords/outlier","display_name":"Outlier","score":0.5599119067192078},{"id":"https://openalex.org/keywords/hierarchical-clustering","display_name":"Hierarchical clustering","score":0.558299720287323},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.546501100063324},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.4951075613498688},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4797288477420807},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4790416955947876},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46002569794654846},{"id":"https://openalex.org/keywords/brown-clustering","display_name":"Brown clustering","score":0.43847474455833435},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4285348951816559},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.4059208035469055},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35285115242004395},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.2513163089752197}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7620872259140015},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7244129776954651},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5805438756942749},{"id":"https://openalex.org/C79337645","wikidata":"https://www.wikidata.org/wiki/Q779824","display_name":"Outlier","level":2,"score":0.5599119067192078},{"id":"https://openalex.org/C92835128","wikidata":"https://www.wikidata.org/wiki/Q1277447","display_name":"Hierarchical clustering","level":3,"score":0.558299720287323},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.546501100063324},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.4951075613498688},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4797288477420807},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4790416955947876},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46002569794654846},{"id":"https://openalex.org/C167984511","wikidata":"https://www.wikidata.org/wiki/Q17003931","display_name":"Brown clustering","level":5,"score":0.43847474455833435},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4285348951816559},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.4059208035469055},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35285115242004395},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.2513163089752197},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/2187980.2188165","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2187980.2188165","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on World Wide Web","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.309.8452","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.309.8452","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www2012.wwwconference.org/proceedings/companion/p637.pdf","raw_type":"text"},{"id":"pmh:oai:ira.lib.polyu.edu.hk:10397/39817","is_oa":false,"landing_page_url":"http://hdl.handle.net/10397/39817","pdf_url":null,"source":{"id":"https://openalex.org/S4306400205","display_name":"PolyU Institutional Research Archive (Hong Kong Polytechnic University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I14243506","host_organization_name":"Hong Kong Polytechnic University","host_organization_lineage":["https://openalex.org/I14243506"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1480643256","https://openalex.org/W1510293779","https://openalex.org/W1601463586","https://openalex.org/W1981202432","https://openalex.org/W2060772621","https://openalex.org/W2136075087","https://openalex.org/W2139231629","https://openalex.org/W2170750237","https://openalex.org/W2299779362","https://openalex.org/W4285719527","https://openalex.org/W6684935517","https://openalex.org/W6764688901"],"related_works":["https://openalex.org/W4292477917","https://openalex.org/W2034251629","https://openalex.org/W2488882714","https://openalex.org/W2592952084","https://openalex.org/W2534504385","https://openalex.org/W2060765209","https://openalex.org/W4385569388","https://openalex.org/W2076085869","https://openalex.org/W4226017214","https://openalex.org/W4231226332"],"abstract_inverted_index":{"Web":[0],"Person":[1],"Disambiguation":[2],"is":[3,46,141],"often":[4],"conducted":[5,47,128],"through":[6,69],"clustering":[7,24,102],"web":[8],"documents":[9],"to":[10,32,35,114],"identify":[11,33],"different":[12],"namesakes":[13],"for":[14],"a":[15,21,28,70,88],"given":[16],"name.":[17],"This":[18],"paper":[19],"presents":[20],"new":[22],"key-phrased":[23],"method":[25],"combined":[26],"with":[27],"second":[29],"step":[30,107],"re-classification":[31,99],"outliers":[34,74],"improve":[36],"cluster":[37,64],"performance.":[38,147],"For":[39],"document":[40],"clustering,":[41],"the":[42,50,59,79,83,101,105,116,130,135,145],"hierarchical":[43],"agglomerative":[44],"approach":[45],"based":[48,137],"on":[49,129,138],"vector":[51],"space":[52],"model":[53,93],"which":[54],"uses":[55,100],"key":[56,89,139],"phrases":[57,140],"as":[58,94,108,113],"main":[60],"feature.":[61],"Outliers":[62],"of":[63,118],"results":[65],"are":[66,75],"then":[67,76],"identified":[68],"centroids-based":[71],"method.":[72],"The":[73,98],"reclassified":[77],"by":[78,123],"SVM":[80],"classifier":[81],"into":[82],"more":[84],"appropriate":[85],"clusters":[86],"using":[87],"phrase-based":[90],"string":[91],"kernel":[92],"its":[95,109],"feature":[96],"space.":[97],"result":[103],"in":[104,143],"first":[106],"training":[110,120],"data":[111,121],"so":[112],"avoid":[115],"use":[117],"separate":[119],"required":[122],"most":[124],"classification":[125],"algorithms.":[126],"Experiments":[127],"WePS-2":[131],"dataset":[132],"show":[133],"that":[134],"algorithm":[136],"effective":[142],"improving":[144],"WPD":[146]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2015,"cited_by_count":2},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
