{"id":"https://openalex.org/W1964879903","doi":"https://doi.org/10.1145/1552303.1552304","title":"Author name disambiguation in MEDLINE","display_name":"Author name disambiguation in MEDLINE","publication_year":2009,"publication_date":"2009-07-01","ids":{"openalex":"https://openalex.org/W1964879903","doi":"https://doi.org/10.1145/1552303.1552304","mag":"1964879903","pmid":"https://pubmed.ncbi.nlm.nih.gov/20072710"},"language":"en","primary_location":{"id":"doi:10.1145/1552303.1552304","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1552303.1552304","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://figshare.com/articles/presentation/Author_Name_Disambiguation_in_Medline/10959596","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5079566229","display_name":"Vetle I. Torvik","orcid":"https://orcid.org/0000-0002-0035-1850"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Vetle I. Torvik","raw_affiliation_strings":["University of Illinois at Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I39422238"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033859037","display_name":"Neil R. Smalheiser","orcid":"https://orcid.org/0000-0003-1079-3406"},"institutions":[{"id":"https://openalex.org/I39422238","display_name":"University of Illinois Chicago","ror":"https://ror.org/02mpq6x41","country_code":"US","type":"education","lineage":["https://openalex.org/I39422238"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Neil R. Smalheiser","raw_affiliation_strings":["University of Illinois at Chicago, Chicago, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Chicago, Chicago, IL, USA","institution_ids":["https://openalex.org/I39422238"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5079566229"],"corresponding_institution_ids":["https://openalex.org/I39422238"],"apc_list":null,"apc_paid":null,"fwci":19.1631,"has_fulltext":false,"cited_by_count":284,"citation_normalized_percentile":{"value":0.99484536,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"3","issue":"3","first_page":"1","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9898999929428101,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/medline","display_name":"MEDLINE","score":0.7037131190299988},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.637448787689209},{"id":"https://openalex.org/keywords/transitive-relation","display_name":"Transitive relation","score":0.6004016399383545},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.5785312056541443},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5740478038787842},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44378662109375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3440703749656677},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15473997592926025},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.08397844433784485}],"concepts":[{"id":"https://openalex.org/C2779473830","wikidata":"https://www.wikidata.org/wiki/Q1540899","display_name":"MEDLINE","level":2,"score":0.7037131190299988},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.637448787689209},{"id":"https://openalex.org/C191399111","wikidata":"https://www.wikidata.org/wiki/Q64861","display_name":"Transitive relation","level":2,"score":0.6004016399383545},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.5785312056541443},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5740478038787842},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44378662109375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3440703749656677},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15473997592926025},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.08397844433784485},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":6,"locations":[{"id":"doi:10.1145/1552303.1552304","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1552303.1552304","pdf_url":null,"source":{"id":"https://openalex.org/S41523882","display_name":"ACM Transactions on Knowledge Discovery from Data","issn_l":"1556-4681","issn":["1556-4681","1556-472X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Knowledge Discovery from Data","raw_type":"journal-article"},{"id":"pmid:20072710","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/20072710","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM transactions on knowledge discovery from data","raw_type":null},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.330.5682","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.330.5682","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://arrowsmith.psych.uic.edu/arrowsmith_uic/tutorial/torvik_tkdd_2009.pdf","raw_type":"text"},{"id":"pmh:oai:europepmc.org:1948321","is_oa":false,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/2805000","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:figshare.com:article/10959596","is_oa":true,"landing_page_url":"https://figshare.com/articles/presentation/Author_Name_Disambiguation_in_Medline/10959596","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:pubmedcentral.nih.gov:2805000","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S2764455111","display_name":"PubMed Central","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Trans Knowl Discov Data","raw_type":"Text"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/10959596","is_oa":true,"landing_page_url":"https://figshare.com/articles/presentation/Author_Name_Disambiguation_in_Medline/10959596","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1000461662","display_name":null,"funder_award_id":"LM008364","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320332222","display_name":"University of Illinois at Urbana-Champaign","ror":"https://ror.org/047426m28"},{"id":"https://openalex.org/F4320337372","display_name":"U.S. National Library of Medicine","ror":"https://ror.org/0060t0j89"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W1490467549","https://openalex.org/W1496319844","https://openalex.org/W1536860849","https://openalex.org/W1547705211","https://openalex.org/W1687451443","https://openalex.org/W1766412966","https://openalex.org/W1846215264","https://openalex.org/W1969587718","https://openalex.org/W1969944699","https://openalex.org/W1982149803","https://openalex.org/W2007172042","https://openalex.org/W2007682403","https://openalex.org/W2026499502","https://openalex.org/W2027482947","https://openalex.org/W2033626137","https://openalex.org/W2056826404","https://openalex.org/W2069334446","https://openalex.org/W2073308541","https://openalex.org/W2087183379","https://openalex.org/W2090987348","https://openalex.org/W2098146716","https://openalex.org/W2100822723","https://openalex.org/W2106300185","https://openalex.org/W2111116800","https://openalex.org/W2116690618","https://openalex.org/W2121123524","https://openalex.org/W2121821044","https://openalex.org/W2128600649","https://openalex.org/W2142515600","https://openalex.org/W2148019918","https://openalex.org/W2153911474","https://openalex.org/W2158608196","https://openalex.org/W2162337786","https://openalex.org/W2164173383","https://openalex.org/W2166159350","https://openalex.org/W2168547935","https://openalex.org/W2913519381","https://openalex.org/W2915936436","https://openalex.org/W4387589449","https://openalex.org/W6639924009"],"related_works":["https://openalex.org/W1947281443","https://openalex.org/W2804130942","https://openalex.org/W2072083441","https://openalex.org/W3167473147","https://openalex.org/W1982515566","https://openalex.org/W1983764570","https://openalex.org/W2020905459","https://openalex.org/W2951251594","https://openalex.org/W2087571931","https://openalex.org/W2163346261"],"abstract_inverted_index":{"BACKGROUND:":[0],"We":[1],"recently":[2],"described":[3],"\"Author-ity,\"":[4],"a":[5,113,124,168],"model":[6,59,223],"for":[7,66,107,118,130,144,250,276],"estimating":[8,108],"the":[9,17,24,54,57,67,109,197,210,248,261],"probability":[10],"that":[11,56,135,157,170,258],"two":[12,193],"articles":[13,71,134,152,207],"in":[14,49,72,153],"MEDLINE,":[15],"sharing":[16],"same":[18,25,198,211],"author":[19,41,64,146],"name,":[20,33],"were":[21,142],"written":[22,208],"by":[23,209],"individual.":[26],"Features":[27],"include":[28],"shared":[29],"title":[30],"words,":[31],"journal":[32],"coauthors,":[34],"medical":[35],"subject":[36],"headings,":[37],"language,":[38],"affiliations,":[39],"and":[40,47,81,86,92,122,161,238,254,266,280],"name":[42,160,173,233],"features":[43],"(middle":[44],"initial,":[45,163],"suffix,":[46],"prevalence":[48],"MEDLINE).":[50],"Here":[51],"we":[52],"test":[53],"hypothesis":[55],"Author-ity":[58,166,222,271],"will":[60,259],"suffice":[61],"to":[62,164,178,213,228,241],"disambiguate":[63],"names":[65,80,91,147],"vast":[68],"majority":[69],"of":[70,98,133,180,202,218,263],"MEDLINE.":[73],"METHODS:":[74],"Enhancements":[75],"include:":[76],"(a)":[77],"incorporating":[78],"first":[79,162],"their":[82],"variants,":[83],"email":[84],"addresses,":[85],"correlations":[87],"between":[88],"specific":[89],"last":[90,159],"affiliation":[93],"words;":[94],"(b)":[95],"new":[96,105,251],"methods":[97,106],"generating":[99],"large":[100],"unbiased":[101],"training":[102],"sets;":[103],"(c)":[104],"prior":[110],"probability;":[111],"(d)":[112],"weighted":[114],"least":[115],"squares":[116],"algorithm":[117,129],"correcting":[119],"transitivity":[120],"violations;":[121],"(e)":[123],"maximum":[125],"likelihood":[126],"based":[127],"agglomerative":[128],"computing":[131],"clusters":[132],"represent":[136],"inferred":[137,183],"author-individuals.":[138],"RESULTS:":[139],"Pairwise":[140],"comparisons":[141],"computed":[143],"all":[145,149],"on":[148,174],"15.3":[150],"million":[151,182],"MEDLINE":[154],"(2006":[155],"baseline),":[156],"share":[158],"create":[165],"2006,":[167],"database":[169,273],"has":[171],"each":[172,175],"article":[176],"assigned":[177],"one":[179],"6.7":[181],"author-individual":[184],"clusters.":[185],"Recall":[186],"is":[187,274],"estimated":[188],"at":[189],"~98.8%.":[190],"Lumping":[191],"(putting":[192],"different":[194],"individuals":[195],"into":[196],"cluster)":[199,215],"affects":[200,216],"~0.5%":[201],"clusters,":[203],"whereas":[204],"splitting":[205],"(assigning":[206],"individual":[212],">1":[214],"~2%":[217],"articles.":[219],"IMPACT:":[220],"The":[221,270],"can":[224,281],"be":[225,282],"applied":[226],"generally":[227],"other":[229],"bibliographic":[230],"databases.":[231],"Author":[232],"disambiguation":[234],"allows":[235],"information":[236],"retrieval":[237],"data":[239,252],"integration":[240],"become":[242],"person-centered,":[243],"not":[244],"just":[245],"document-centered,":[246],"setting":[247],"stage":[249],"mining":[253],"social":[255],"network":[256],"tools":[257],"facilitate":[260],"analysis":[262],"scholarly":[264],"publishing":[265],"collaboration":[267],"behavior.":[268],"AVAILABILITY:":[269],"2006":[272],"available":[275],"nonprofit":[277],"academic":[278],"research,":[279],"freely":[283],"queried":[284],"via":[285],"http://arrowsmith.psych.uic.edu.":[286]},"counts_by_year":[{"year":2026,"cited_by_count":4},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":14},{"year":2023,"cited_by_count":14},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":29},{"year":2020,"cited_by_count":25},{"year":2019,"cited_by_count":18},{"year":2018,"cited_by_count":16},{"year":2017,"cited_by_count":16},{"year":2016,"cited_by_count":21},{"year":2015,"cited_by_count":23},{"year":2014,"cited_by_count":25},{"year":2013,"cited_by_count":14},{"year":2012,"cited_by_count":15}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
