{"id":"https://openalex.org/W2016056454","doi":"https://doi.org/10.1109/bigdata.2014.7004345","title":"Why name ambiguity resolution matters for scholarly big data research","display_name":"Why name ambiguity resolution matters for scholarly big data research","publication_year":2014,"publication_date":"2014-10-01","ids":{"openalex":"https://openalex.org/W2016056454","doi":"https://doi.org/10.1109/bigdata.2014.7004345","mag":"2016056454"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata.2014.7004345","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2014.7004345","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100757432","display_name":"Jinseok Kim","orcid":"https://orcid.org/0000-0001-6481-2065"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jinseok Kim","raw_affiliation_strings":["Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, USA","Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, (USA)"],"affiliations":[{"raw_affiliation_string":"Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, (USA)","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025085845","display_name":"Jana Diesner","orcid":"https://orcid.org/0000-0001-8183-7109"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jana Diesner","raw_affiliation_strings":["Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, USA","Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, (USA)"],"affiliations":[{"raw_affiliation_string":"Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Graduate School of Library and Information Science, University of Illinois at Urbana-Champaign, Urbana, (USA)","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027426063","display_name":"Hee\u2010Jun Kim","orcid":"https://orcid.org/0000-0002-9044-2122"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]},{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]},{"id":"https://openalex.org/I878022262","display_name":"Korea Institute of Science & Technology Information","ror":"https://ror.org/01k4yrm29","country_code":"KR","type":"facility","lineage":["https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098","https://openalex.org/I878022262"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Heejun Kim","raw_affiliation_strings":["Department of Computer Science, University of Illinois at Urbana-Champaign Urbana, USA","Department of Overseas Information, Korea Institute of Science and Technology Information, Daejeon, Korea","School of Information and Library Science, University of North Carolina at Chapel Hill, Chapel Hill, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Urbana-Champaign Urbana, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Department of Overseas Information, Korea Institute of Science and Technology Information, Daejeon, Korea","institution_ids":["https://openalex.org/I878022262"]},{"raw_affiliation_string":"School of Information and Library Science, University of North Carolina at Chapel Hill, Chapel Hill, USA#TAB#","institution_ids":["https://openalex.org/I114027177"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016770173","display_name":"Amirhossein Aleyasen","orcid":null},"institutions":[{"id":"https://openalex.org/I114027177","display_name":"University of North Carolina at Chapel Hill","ror":"https://ror.org/0130frc33","country_code":"US","type":"education","lineage":["https://openalex.org/I114027177"]},{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Amirhossein Aleyasen","raw_affiliation_strings":["School of Information and Library Science, University of North Carolina at Chapel Hill, Chapel Hill, USA","Department of Computer Science, University of Illinois at Urbana Champaign, Urbana, USA#TAB#"],"affiliations":[{"raw_affiliation_string":"School of Information and Library Science, University of North Carolina at Chapel Hill, Chapel Hill, USA","institution_ids":["https://openalex.org/I114027177"]},{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Urbana Champaign, Urbana, USA#TAB#","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5055046516","display_name":"Hwan-Min Kim","orcid":null},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]},{"id":"https://openalex.org/I878022262","display_name":"Korea Institute of Science & Technology Information","ror":"https://ror.org/01k4yrm29","country_code":"KR","type":"facility","lineage":["https://openalex.org/I2801339556","https://openalex.org/I4210144908","https://openalex.org/I4387152098","https://openalex.org/I878022262"]}],"countries":["KR","US"],"is_corresponding":false,"raw_author_name":"Hwan-Min Kim","raw_affiliation_strings":["Department of Computer Science, University of Illinois at Urbana-Champaign Urbana, USA","Department of Overseas Information, Korea Institute of Science and Technology Information, Daejeon, Korea"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Urbana-Champaign Urbana, USA","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"Department of Overseas Information, Korea Institute of Science and Technology Information, Daejeon, Korea","institution_ids":["https://openalex.org/I878022262"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100757432"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":1.6815,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.85612779,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10102","display_name":"scientometrics and bibliometrics research","score":0.9883000254631042,"subfield":{"id":"https://openalex.org/subfields/1804","display_name":"Statistics, Probability and Uncertainty"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11819","display_name":"Data-Driven Disease Surveillance","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/2713","display_name":"Epidemiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.7979044318199158},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7228637933731079},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.6655398607254028},{"id":"https://openalex.org/keywords/centrality","display_name":"Centrality","score":0.5712259411811829},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4916713833808899},{"id":"https://openalex.org/keywords/resolution","display_name":"Resolution (logic)","score":0.4894789457321167},{"id":"https://openalex.org/keywords/productivity","display_name":"Productivity","score":0.4872266352176666},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.47685301303863525},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40604132413864136},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.28427204489707947},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.24281442165374756},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.1602502167224884},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12994498014450073}],"concepts":[{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.7979044318199158},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7228637933731079},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.6655398607254028},{"id":"https://openalex.org/C53811970","wikidata":"https://www.wikidata.org/wiki/Q5062194","display_name":"Centrality","level":2,"score":0.5712259411811829},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4916713833808899},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.4894789457321167},{"id":"https://openalex.org/C204983608","wikidata":"https://www.wikidata.org/wiki/Q2111958","display_name":"Productivity","level":2,"score":0.4872266352176666},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.47685301303863525},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40604132413864136},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28427204489707947},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.24281442165374756},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.1602502167224884},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12994498014450073},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C139719470","wikidata":"https://www.wikidata.org/wiki/Q39680","display_name":"Macroeconomics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/bigdata.2014.7004345","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata.2014.7004345","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.713.6831","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.713.6831","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://web.engr.illinois.edu/%7Ealeyase2/data/pubs/PID3417053.pdf","raw_type":"text"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6700000166893005,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W155417673","https://openalex.org/W1503428008","https://openalex.org/W1728984519","https://openalex.org/W1964879903","https://openalex.org/W1964920419","https://openalex.org/W1965851398","https://openalex.org/W1977221702","https://openalex.org/W1984558542","https://openalex.org/W2017557934","https://openalex.org/W2033626137","https://openalex.org/W2037933327","https://openalex.org/W2038261631","https://openalex.org/W2039599657","https://openalex.org/W2040118133","https://openalex.org/W2040552593","https://openalex.org/W2041309207","https://openalex.org/W2049213211","https://openalex.org/W2050673765","https://openalex.org/W2058259957","https://openalex.org/W2061605487","https://openalex.org/W2089644168","https://openalex.org/W2116386566","https://openalex.org/W2125315567","https://openalex.org/W2128132605","https://openalex.org/W2145845082","https://openalex.org/W2147107544","https://openalex.org/W2156897283","https://openalex.org/W2953256263","https://openalex.org/W3098845338","https://openalex.org/W3100447784","https://openalex.org/W3121279336","https://openalex.org/W4214502461","https://openalex.org/W4232932184"],"related_works":["https://openalex.org/W4229078645","https://openalex.org/W1977345676","https://openalex.org/W4282032776","https://openalex.org/W2047552823","https://openalex.org/W4321606905","https://openalex.org/W3130445735","https://openalex.org/W2105110616","https://openalex.org/W2739658809","https://openalex.org/W2747930654","https://openalex.org/W986928874"],"abstract_inverted_index":{"This":[0,129],"paper":[1],"illustrates":[2],"how":[3],"data":[4,135,141],"pre-processing":[5,136],"choices":[6,137],"about":[7,15,20],"author":[8,65],"name":[9,89],"disambiguation":[10,40],"can":[11],"affect":[12],"research":[13],"findings":[14],"scholarly":[16,29,139],"networks":[17],"and":[18,35,44,52,73,78,113],"hypotheses":[19],"underlying":[21],"social":[22],"mechanisms.":[23],"We":[24],"have":[25],"analyzed":[26],"three":[27],"big":[28,140],"datasets":[30],"that":[31,56],"were":[32],"disambiguated":[33],"algorithmically":[34],"via":[36],"two":[37],"common":[38],"initial-based":[39],"methods;":[41],"namely":[42],"first-initial":[43],"all-initials":[45],"disambiguation.":[46,128],"The":[47,84],"comparison":[48],"of":[49,62,70,80,88,103],"resulting":[50],"bibliometric":[51],"network":[53],"properties":[54],"revealed":[55],"initial-disambiguation":[57],"bears":[58],"the":[59,68,75,101],"prevalent":[60],"risks":[61],"incorrectly":[63],"merging":[64],"identities,":[66],"underestimating":[67],"number":[69,79,102],"unique":[71,104],"authors":[72],"inflating":[74],"average":[76,111],"productivity":[77],"collaborators":[81],"per":[82,98],"author.":[83],"gaps":[85],"between":[86],"outcomes":[87],"ambiguity":[90],"resolution":[91],"methods":[92,124],"range":[93],"from":[94,106,114],"\u22124.23%":[95],"to":[96,108,116,126,134],"\u221287.36%":[97],"dataset":[99],"for":[100,110,118,121,131],"authors,":[105],"3.75%":[107],"691.20%":[109],"productivity,":[112],"5.06%":[115],"285.28%":[117],"degree":[119],"centrality":[120],"initial":[122],"based":[123],"compared":[125],"algorithmic":[127],"calls":[130],"special":[132],"attention":[133],"in":[138],"research.":[142]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":3},{"year":2016,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
