{"id":"https://openalex.org/W2883357302","doi":"https://doi.org/10.1007/s11192-018-2865-9","title":"The impact of imbalanced training data on machine learning for author name disambiguation","display_name":"The impact of imbalanced training data on machine learning for author name disambiguation","publication_year":2018,"publication_date":"2018-07-27","ids":{"openalex":"https://openalex.org/W2883357302","doi":"https://doi.org/10.1007/s11192-018-2865-9","mag":"2883357302"},"language":"en","primary_location":{"id":"doi:10.1007/s11192-018-2865-9","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11192-018-2865-9","pdf_url":null,"source":{"id":"https://openalex.org/S148561398","display_name":"Scientometrics","issn_l":"0138-9130","issn":["0138-9130","1588-2861"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320108","host_organization_name":"Springer Nature (Netherlands)","host_organization_lineage":["https://openalex.org/P4310320108","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature (Netherlands)","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Scientometrics","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1808.00525","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100757432","display_name":"Jinseok Kim","orcid":"https://orcid.org/0000-0001-6481-2065"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]},{"id":"https://openalex.org/I70983195","display_name":"Syracuse University","ror":"https://ror.org/025r5qe02","country_code":"US","type":"education","lineage":["https://openalex.org/I70983195"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jinseok Kim","raw_affiliation_strings":["Institute for Research on Innovation and Science, Survey Research Center, Institute for Social Research, University of Michigan, 330 Packard Street, Ann Arbor, MI, 48104, USA","School of Information Studies, Syracuse University, 343 Hinds Hall, Syracuse, NY, 13210, USA"],"raw_orcid":"https://orcid.org/0000-0001-6481-2065","affiliations":[{"raw_affiliation_string":"Institute for Research on Innovation and Science, Survey Research Center, Institute for Social Research, University of Michigan, 330 Packard Street, Ann Arbor, MI, 48104, USA","institution_ids":["https://openalex.org/I27837315"]},{"raw_affiliation_string":"School of Information Studies, Syracuse University, 343 Hinds Hall, Syracuse, NY, 13210, USA","institution_ids":["https://openalex.org/I70983195"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076986012","display_name":"Jenna Kim","orcid":"https://orcid.org/0000-0001-7438-448X"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]},{"id":"https://openalex.org/I70983195","display_name":"Syracuse University","ror":"https://ror.org/025r5qe02","country_code":"US","type":"education","lineage":["https://openalex.org/I70983195"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jenna Kim","raw_affiliation_strings":["Institute for Research on Innovation and Science, Survey Research Center, Institute for Social Research, University of Michigan, 330 Packard Street, Ann Arbor, MI, 48104, USA","School of Information Studies, Syracuse University, 343 Hinds Hall, Syracuse, NY, 13210, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute for Research on Innovation and Science, Survey Research Center, Institute for Social Research, University of Michigan, 330 Packard Street, Ann Arbor, MI, 48104, USA","institution_ids":["https://openalex.org/I27837315"]},{"raw_affiliation_string":"School of Information Studies, Syracuse University, 343 Hinds Hall, Syracuse, NY, 13210, USA","institution_ids":["https://openalex.org/I70983195"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100757432"],"corresponding_institution_ids":["https://openalex.org/I27837315","https://openalex.org/I70983195"],"apc_list":{"value":2290,"currency":"EUR","value_usd":2890},"apc_paid":null,"fwci":5.0749,"has_fulltext":false,"cited_by_count":48,"citation_normalized_percentile":{"value":0.95378457,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"117","issue":"1","first_page":"511","last_page":"526"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9955000281333923,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7445571422576904},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7314972877502441},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.7206780314445496},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6884037852287292},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.6466134190559387},{"id":"https://openalex.org/keywords/logistic-regression","display_name":"Logistic regression","score":0.5820410251617432},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.514323353767395},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4679514169692993},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4325869679450989},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.3042280077934265}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7445571422576904},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7314972877502441},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.7206780314445496},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6884037852287292},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.6466134190559387},{"id":"https://openalex.org/C151956035","wikidata":"https://www.wikidata.org/wiki/Q1132755","display_name":"Logistic regression","level":2,"score":0.5820410251617432},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.514323353767395},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4679514169692993},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4325869679450989},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.3042280077934265},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/s11192-018-2865-9","is_oa":false,"landing_page_url":"https://doi.org/10.1007/s11192-018-2865-9","pdf_url":null,"source":{"id":"https://openalex.org/S148561398","display_name":"Scientometrics","issn_l":"0138-9130","issn":["0138-9130","1588-2861"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320108","host_organization_name":"Springer Nature (Netherlands)","host_organization_lineage":["https://openalex.org/P4310320108","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Nature (Netherlands)","Springer Nature"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Scientometrics","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:1808.00525","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1808.00525","pdf_url":"https://arxiv.org/pdf/1808.00525","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:RePEc:spr:scient:v:117:y:2018:i:1:d:10.1007_s11192-018-2865-9","is_oa":false,"landing_page_url":"http://link.springer.com/10.1007/s11192-018-2865-9","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1808.00525","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1808.00525","pdf_url":"https://arxiv.org/pdf/1808.00525","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1791332904","display_name":null,"funder_award_id":"1535370","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7095995687","display_name":null,"funder_award_id":"1561687","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320306140","display_name":"Ewing Marion Kauffman Foundation","ror":"https://ror.org/0288es483"},{"id":"https://openalex.org/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https://ror.org/052csg198"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W809740722","https://openalex.org/W1208937987","https://openalex.org/W1912764128","https://openalex.org/W1964879903","https://openalex.org/W1982912387","https://openalex.org/W1990976049","https://openalex.org/W1991157445","https://openalex.org/W2000910146","https://openalex.org/W2018353135","https://openalex.org/W2026499502","https://openalex.org/W2033626137","https://openalex.org/W2034368206","https://openalex.org/W2041309207","https://openalex.org/W2042913039","https://openalex.org/W2046842230","https://openalex.org/W2056826404","https://openalex.org/W2065330407","https://openalex.org/W2065858398","https://openalex.org/W2080099271","https://openalex.org/W2098162425","https://openalex.org/W2098365647","https://openalex.org/W2113846323","https://openalex.org/W2118978333","https://openalex.org/W2125315567","https://openalex.org/W2129558264","https://openalex.org/W2131193521","https://openalex.org/W2134510195","https://openalex.org/W2145772003","https://openalex.org/W2145845082","https://openalex.org/W2145893390","https://openalex.org/W2147347208","https://openalex.org/W2148143831","https://openalex.org/W2162337786","https://openalex.org/W2170612786","https://openalex.org/W2281739675","https://openalex.org/W2599579729","https://openalex.org/W2759340467","https://openalex.org/W2809958714","https://openalex.org/W3100447784","https://openalex.org/W3105255771","https://openalex.org/W4231254446","https://openalex.org/W6684642658"],"related_works":["https://openalex.org/W4396689146","https://openalex.org/W4200112873","https://openalex.org/W4367336074","https://openalex.org/W2955796858","https://openalex.org/W3154045278","https://openalex.org/W4379620016","https://openalex.org/W4393666307","https://openalex.org/W3210764983","https://openalex.org/W4393443811","https://openalex.org/W4367335949"],"abstract_inverted_index":null,"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":7},{"year":2023,"cited_by_count":9},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":14},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2018-08-03T00:00:00"}
