{"id":"https://openalex.org/W2007172042","doi":"https://doi.org/10.1145/1255175.1255243","title":"Efficient topic-based unsupervised name disambiguation","display_name":"Efficient topic-based unsupervised name disambiguation","publication_year":2007,"publication_date":"2007-06-18","ids":{"openalex":"https://openalex.org/W2007172042","doi":"https://doi.org/10.1145/1255175.1255243","mag":"2007172042"},"language":"en","primary_location":{"id":"doi:10.1145/1255175.1255243","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1255175.1255243","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM/IEEE-CS joint conference on Digital libraries","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100688427","display_name":"Yang Song","orcid":"https://orcid.org/0000-0003-3420-4987"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yang Song","raw_affiliation_strings":["The Pennsylvania State University, University Park, PA","THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, University Park, PA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100698792","display_name":"Jian Huang","orcid":"https://orcid.org/0000-0002-3783-2682"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian Huang","raw_affiliation_strings":["The Pennsylvania State University, University Park, PA","THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, University Park, PA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Isaac G. Councill","orcid":null},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Isaac G. Councill","raw_affiliation_strings":["The Pennsylvania State University, University Park, PA","THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, University Park, PA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054585200","display_name":"Jia Li","orcid":"https://orcid.org/0000-0002-6077-7896"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jia Li","raw_affiliation_strings":["The Pennsylvania State University, University Park, PA","THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, University Park, PA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA","institution_ids":["https://openalex.org/I130769515"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001294898","display_name":"C. Lee Giles","orcid":"https://orcid.org/0000-0002-1931-585X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Lee Giles","raw_affiliation_strings":["The Pennsylvania State University, University Park, PA","THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The Pennsylvania State University, University Park, PA","institution_ids":["https://openalex.org/I130769515"]},{"raw_affiliation_string":"THE PENNSYLVANIA STATE UNIVERSITY, UNIVERSITY PARK, PA","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":12.6294,"has_fulltext":false,"cited_by_count":159,"citation_normalized_percentile":{"value":0.98960367,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"342","last_page":"351"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8322852849960327},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.7625627517700195},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.740670919418335},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6104369163513184},{"id":"https://openalex.org/keywords/probabilistic-latent-semantic-analysis","display_name":"Probabilistic latent semantic analysis","score":0.5798572301864624},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5351067185401917},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.4920041263103485},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44037503004074097},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.4390881657600403},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.4388652741909027},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4281255304813385},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.41300472617149353},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3576902151107788},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32583513855934143}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8322852849960327},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.7625627517700195},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.740670919418335},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6104369163513184},{"id":"https://openalex.org/C112933361","wikidata":"https://www.wikidata.org/wiki/Q2845258","display_name":"Probabilistic latent semantic analysis","level":2,"score":0.5798572301864624},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5351067185401917},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.4920041263103485},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44037503004074097},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.4390881657600403},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.4388652741909027},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4281255304813385},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.41300472617149353},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3576902151107788},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32583513855934143},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1255175.1255243","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1255175.1255243","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 7th ACM/IEEE-CS joint conference on Digital libraries","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1490467549","https://openalex.org/W1505673315","https://openalex.org/W1536860849","https://openalex.org/W1632562993","https://openalex.org/W1774330103","https://openalex.org/W1880262756","https://openalex.org/W1982678692","https://openalex.org/W2016381774","https://openalex.org/W2018605660","https://openalex.org/W2042980227","https://openalex.org/W2056826404","https://openalex.org/W2059035258","https://openalex.org/W2075756051","https://openalex.org/W2103658758","https://openalex.org/W2107034620","https://openalex.org/W2107743791","https://openalex.org/W2120721759","https://openalex.org/W2124957865","https://openalex.org/W2130818673","https://openalex.org/W2140124448","https://openalex.org/W2147152072","https://openalex.org/W2153911474","https://openalex.org/W2162337786","https://openalex.org/W2162820221","https://openalex.org/W2168190036","https://openalex.org/W2171343266","https://openalex.org/W4233135949"],"related_works":["https://openalex.org/W1551384396","https://openalex.org/W2888805565","https://openalex.org/W2096865229","https://openalex.org/W2921491680","https://openalex.org/W2251863249","https://openalex.org/W4291700620","https://openalex.org/W2132052677","https://openalex.org/W3159709618","https://openalex.org/W4396666968","https://openalex.org/W2110027950"],"abstract_inverted_index":{"Name":[0],"ambiguity":[1],"is":[2],"a":[3,92,128],"special":[4],"case":[5],"of":[6,39,101,172],"identity":[7],"uncertainty":[8],"where":[9],"one":[10],"person":[11,41],"can":[12],"be":[13,161],"referenced":[14],"by":[15,70,126,174],"multiple":[16],"name":[17,27],"variations":[18],"in":[19,177],"different":[20],"situations":[21],"or":[22],"even":[23],"share":[24],"the":[25,37,61,99,114,170,182],"same":[26],"with":[28,103],"other":[29,148,164],"people.":[30],"In":[31,60],"this":[32],"paper,":[33],"we":[34],"focus":[35],"on":[36,134],"problem":[38],"disambiguating":[40,175],"names":[42,123],"within":[43],"web":[44,135],"pages":[45],"and":[46,53,83,97,107,122,137,156,159],"scientific":[47,138],"documents.":[48],"We":[49,167],"present":[50],"an":[51,111],"efficient":[52],"effective":[54],"two-stage":[55],"approach":[56,145],"to":[57,105,163],"disambiguate":[58],"names.":[59],"first":[62],"stage,":[63],"two":[64,72],"novel":[65],"topic-based":[66],"models":[67,89],"are":[68,117,124],"proposed":[69],"extending":[71],"hierarchical":[73,129],"Bayesian":[74],"text":[75],"models,":[76],"namely":[77],"Probabilistic":[78],"Latent":[79,84],"Semantic":[80],"Analysis":[81],"(PLSA)":[82],"Dirichlet":[85],"Allocation":[86],"(LDA).":[87],"Our":[88],"explicitly":[90],"introduce":[91],"new":[93],"variable":[94],"for":[95],"persons":[96,106],"learn":[98],"distribution":[100],"topics":[102],"regard":[104],"words.":[108],"After":[109],"learning":[110,150],"initial":[112],"model,":[113],"topic":[115],"distributions":[116],"treated":[118],"as":[119,153],"feature":[120],"sets":[121],"disambiguated":[125],"leveraging":[127],"agglomerative":[130],"clustering":[131,155,158],"method.":[132],"Experiments":[133],"data":[136],"documents":[139],"from":[140,181],"CiteSeer":[141,184],"indicate":[142],"that":[143],"our":[144],"consistently":[146],"outperforms":[147],"unsupervised":[149],"methods":[151],"such":[152],"spectral":[154],"DBSCAN":[157],"could":[160],"extended":[162],"research":[165],"fields.":[166],"empirically":[168],"addressed":[169],"issue":[171],"scalability":[173],"authors":[176],"over":[178],"750,000":[179],"papers":[180],"entire":[183],"dataset.":[185]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":6},{"year":2018,"cited_by_count":7},{"year":2017,"cited_by_count":11},{"year":2016,"cited_by_count":15},{"year":2015,"cited_by_count":12},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":11},{"year":2012,"cited_by_count":17}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
