{"id":"https://openalex.org/W2081116417","doi":"https://doi.org/10.1002/meet.14504901244","title":"Analysis of networks in cyberinfrastructure\u2010enabled research communities: A pilot study","display_name":"Analysis of networks in cyberinfrastructure\u2010enabled research communities: A pilot study","publication_year":2012,"publication_date":"2012-01-01","ids":{"openalex":"https://openalex.org/W2081116417","doi":"https://doi.org/10.1002/meet.14504901244","mag":"2081116417"},"language":"en","primary_location":{"id":"doi:10.1002/meet.14504901244","is_oa":true,"landing_page_url":"https://doi.org/10.1002/meet.14504901244","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14504901244","source":{"id":"https://openalex.org/S4306523999","display_name":"Proceedings of the American Society for Information Science and Technology","issn_l":"1550-8390","issn":["1550-8390","1936-1734"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the American Society for Information Science and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14504901244","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5038173240","display_name":"Mark R. Costa","orcid":"https://orcid.org/0000-0001-7967-980X"},"institutions":[{"id":"https://openalex.org/I70983195","display_name":"Syracuse University","ror":"https://ror.org/025r5qe02","country_code":"US","type":"education","lineage":["https://openalex.org/I70983195"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Mark R. Costa","raw_affiliation_strings":["School of Information Studies, Syracuse University, 337, Hinds Hall, Syracuse, New York, 13244 (USA)"],"affiliations":[{"raw_affiliation_string":"School of Information Studies, Syracuse University, 337, Hinds Hall, Syracuse, New York, 13244 (USA)","institution_ids":["https://openalex.org/I70983195"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5041928722","display_name":"Jian Qin","orcid":"https://orcid.org/0000-0002-7094-2867"},"institutions":[{"id":"https://openalex.org/I70983195","display_name":"Syracuse University","ror":"https://ror.org/025r5qe02","country_code":"US","type":"education","lineage":["https://openalex.org/I70983195"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jian Qin","raw_affiliation_strings":["School of Information Studies, Syracuse University, 311, Hinds Hall, Syracuse, New York, 13244 (USA)"],"affiliations":[{"raw_affiliation_string":"School of Information Studies, Syracuse University, 311, Hinds Hall, Syracuse, New York, 13244 (USA)","institution_ids":["https://openalex.org/I70983195"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5038173240","https://openalex.org/A5041928722"],"corresponding_institution_ids":["https://openalex.org/I70983195"],"apc_list":null,"apc_paid":null,"fwci":0.3999,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.67878261,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"49","issue":"1","first_page":"1","last_page":"4"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9879000186920166,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9865000247955322,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cyberinfrastructure","display_name":"Cyberinfrastructure","score":0.9641281366348267},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.6289492845535278},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.5269647240638733},{"id":"https://openalex.org/keywords/scientometrics","display_name":"Scientometrics","score":0.5243639945983887},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.4967249035835266},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4638611674308777},{"id":"https://openalex.org/keywords/citation","display_name":"Citation","score":0.45685699582099915},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.42995381355285645},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.35883212089538574},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.25809401273727417},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.12363728880882263}],"concepts":[{"id":"https://openalex.org/C2776397876","wikidata":"https://www.wikidata.org/wiki/Q1450531","display_name":"Cyberinfrastructure","level":2,"score":0.9641281366348267},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.6289492845535278},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.5269647240638733},{"id":"https://openalex.org/C525823164","wikidata":"https://www.wikidata.org/wiki/Q472342","display_name":"Scientometrics","level":2,"score":0.5243639945983887},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4967249035835266},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4638611674308777},{"id":"https://openalex.org/C2778805511","wikidata":"https://www.wikidata.org/wiki/Q1713","display_name":"Citation","level":2,"score":0.45685699582099915},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.42995381355285645},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.35883212089538574},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.25809401273727417},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.12363728880882263},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1002/meet.14504901244","is_oa":true,"landing_page_url":"https://doi.org/10.1002/meet.14504901244","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14504901244","source":{"id":"https://openalex.org/S4306523999","display_name":"Proceedings of the American Society for Information Science and Technology","issn_l":"1550-8390","issn":["1550-8390","1936-1734"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the American Society for Information Science and Technology","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1002/meet.14504901244","is_oa":true,"landing_page_url":"https://doi.org/10.1002/meet.14504901244","pdf_url":"https://onlinelibrary.wiley.com/doi/pdfdirect/10.1002/meet.14504901244","source":{"id":"https://openalex.org/S4306523999","display_name":"Proceedings of the American Society for Information Science and Technology","issn_l":"1550-8390","issn":["1550-8390","1936-1734"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the American Society for Information Science and Technology","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2081116417.pdf","grobid_xml":"https://content.openalex.org/works/W2081116417.grobid-xml"},"referenced_works_count":14,"referenced_works":["https://openalex.org/W189804332","https://openalex.org/W1671906456","https://openalex.org/W1809800090","https://openalex.org/W1967874852","https://openalex.org/W1987334994","https://openalex.org/W2008757894","https://openalex.org/W2025572017","https://openalex.org/W2072897447","https://openalex.org/W2108991785","https://openalex.org/W2125315567","https://openalex.org/W2161806764","https://openalex.org/W2165429918","https://openalex.org/W3146259567","https://openalex.org/W4249730870"],"related_works":["https://openalex.org/W273500195","https://openalex.org/W2043019925","https://openalex.org/W4236056781","https://openalex.org/W4293768956","https://openalex.org/W2773781927","https://openalex.org/W2184477657","https://openalex.org/W90628286","https://openalex.org/W1696735061","https://openalex.org/W2152811545","https://openalex.org/W1999096178"],"abstract_inverted_index":{"The":[0,153,647,777,897,1131,1158,1402],"advancement":[1],"of":[2,30,49,58,66,77,155,179,188,207,215,249,269,277,285,306,310,358,424,441,448,460,477,490,500,530,536,542,564,598,600,609,621,644,666,675,695,712,719,779,883,900,926,945,954,1000,1024,1156,1270,1306,1319,1335,1346,1376,1386,1406,1413,1420,1487,1513,1550],"cyberinfrastructure":[3],"(CI)":[4],"in":[5,12,14,27,124,372,399,435,572,602,662,729,737,774,789,822,930,979,992,1054,1088,1110,1187,1206,1343,1404,1425,1431,1441,1480,1521],"the":[6,15,28,56,88,141,163,177,191,204,212,255,266,304,313,337,345,348,361,417,437,445,488,498,501,505,514,522,528,543,555,567,607,619,625,642,667,682,692,720,741,749,753,757,770,775,780,799,802,812,818,832,837,850,859,879,884,917,938,955,958,1004,1138,1163,1188,1261,1303,1324,1350,1355,1362,1383,1411,1418,1451,1488,1514,1518,1531,1536,1548,1551],"last":[7],"two":[8,1064,1124,1165,1185,1215],"decades":[9],"has":[10],"resulted":[11],"shift":[13],"way":[16],"research":[17,39,202,261,279,502,626],"is":[18,128,224,378,381,613,664,878,1062,1084,1237,1409,1525],"designed":[19],"and":[20,74,107,112,115,148,235,240,290,427,439,444,516,520,582,585,634,638,687,756,786,865,932,947,1012,1098,1259,1436,1445,1462,1494,1508],"conducted,":[21],"as":[22,24,104,209,211,263,265,509,765,767,986,988,1092,1422,1424,1517,1539],"well":[23,210,264,766,987,1423],"an":[25,225,247,250,656],"increase":[26],"scale":[29],"collaboration":[31,59,68,79,89,131,146,186,205,234,419,425,537,565,584,603],"(Szalay":[32],"&":[33,82],"Blakeley,":[34],"2009).":[35],"These":[36],"large-scale":[37],"collaborative":[38],"projects":[40],"have":[41,51,86,1051,1103,1541],"become":[42],"common":[43],"practice":[44],"today.":[45],"While":[46,328,801,1026],"a":[47,274,355,368,458,474,540,615,829,869,894,962,993,998,1022,1052,1085,1119,1197,1226,1231,1316,1332,1373,1397],"number":[48,925,999,1305,1385],"studies":[50,97,479],"been":[52,1104],"done":[53],"to":[54,174,231,300,322,325,334,383,396,468,485,550,576,606,640,684,708,784,791,835,848,857,874,957,976,981,989,1030,1041,1056,1071,1077,1106,1127,1171,1182,1239,1248,1255,1263,1277,1300,1338,1359,1391,1400,1417,1464,1471,1474,1477,1528,1546,1555],"investigate":[55],"interdisciplinarity":[57],"(e.g.,":[60,271,450,465,1217],"Qin":[61],"et":[62,454,1115],"al.,":[63],"1997),":[64],"structure":[65,438],"scientific":[67,78,180,506,544,568,1448],"networks":[69,80,132,147,257,420,443],"(Newman,":[70],"2000;":[71],"Newman,":[72],"2001),":[73],"computational":[75,196,534],"reasoning":[76],"(Evans":[81],"Rzhetsky,":[83],"2011),":[84],"few":[85],"investigated":[87],"patterns":[90,187,206,237,587,604],"around":[91,135,239,589,681,1496],"CI-enabled":[92,156,201,242,278,590,622],"research.":[93,646,1429],"That":[94,167],"is,":[95,168],"these":[96,144,260,442,482,492,551],"were":[98,298,393,406,815,887,1018,1028,1038,1245,1253,1282,1291],"based":[99,1459],"on":[100,227,416,481,487,497,539,624,654,691,748,769,1141,1460],"bibliographic":[101,116],"relationships,":[102],"such":[103,1091],"co-authorship":[105],"(Gl\u00e4nzel":[106],"Schubert,":[108],"2005),":[109],"co-citation":[110],"(White":[111],"McCain,":[113],"1998),":[114],"coupling":[117],"(Egghe,":[118],"2002).":[119],"Although":[120,1243],"scientists":[121,189,288,297,321,333,359,392,1427],"increasingly":[122],"engage":[123],"data-intensive":[125],"research,":[126,633],"little":[127],"known":[129],"about":[130,140],"that":[133,293,340,363,371,546,881,1007,1150,1453],"evolve":[134],"data":[136,311,314,342,364,377,556,648,690,725,750,755,772,797,807,814,860,901,1017,1034,1208,1407,1478,1497,1504,1506,1519,1532],"repositories,":[137],"much":[138,1198],"less":[139],"similarity/dissimilarity":[142],"between":[143],"new":[145,578],"those":[149,470,1465],"surrounding":[150],"publication":[151,940,964,1356],"outputs.":[152,1357],"use":[154,457,1405],"repositories":[157,1408,1479],"gives":[158],"us":[159,575,1009],"additional":[160],"insight":[161],"into":[162,861,868,876,893,1175,1535],"knowledge":[164,192,270],"production":[165,193],"process.":[166],"we":[169,182,199,218,229,853,1027,1037,1048,1244,1252,1540],"are":[170,433,703,1069,1123,1191,1268,1439,1458,1533],"no":[171],"longer":[172],"limited":[173,513],"analyzing":[175,254,1354],"only":[176,1212],"end-products":[178,499],"research;":[181],"can":[183,219,726,1151,1553],"now":[184],"explore":[185,469,1472,1547],"along":[190],"chain":[194],"using":[195,1168,1179],"methods.":[197],"Additionally,":[198],"expect":[200],"affects":[203],"scientists,":[208],"subsequent":[213],"diffusion":[214,236,256,268,447,586],"knowledge.":[216],"Before":[217],"quantitatively":[220],"determine":[221],"if":[222],"there":[223,702,1122,1281,1290],"effect":[226],"collaboration,":[228],"need":[230,1470],"analyze":[232,785],"existing":[233,689],"emerging":[238,588],"from":[241,259,680,705,740,798,808,817,907,1010,1035],"science.":[243,591],"This":[244,524,559,592,611,651,673,724,760,1194,1430,1483],"work":[245,593,612,1484],"provides":[246],"overview":[248],"ongoing":[251],"pilot":[252,781,813],"study":[253,525,531],"resulting":[258,929],"collaborations,":[262],"associated":[267,758,794,1490],"patents)":[272],"for":[273,410,518,580,617,631,920,937,1021,1118,1136,1154,1543],"specific":[275],"instance":[276],"\u2013":[280],"GenBank":[281,819],"(http://www.ncbi.nlm.nih.gov/genbank/).":[282],"Traditional":[283],"models":[284,1135],"science":[286,623],"include":[287],"designing":[289],"executing":[291],"experiments":[292],"generate":[294],"data.":[295,307,523,838,851,1046],"Consequently,":[296],"able":[299,395,1029,1040,1247,1254],"exercise":[301],"control":[302],"over":[303],"dissemination":[305],"Controlled":[308],"release":[309],"(if":[312],"was":[315,339,343,547,783,971,1310,1364],"ever":[316],"released":[317],"at":[318,828,1294],"all)":[319],"allowed":[320],"maximize":[323],"opportunities":[324],"make":[326,1416],"discoveries.":[327],"normative":[329],"expectations":[330],"often":[331],"encouraged":[332],"share":[335],"data,":[336,491,913],"reality":[338,373],"sharing":[341],"more":[344,1066,1199],"exception":[346],"than":[347],"rule.":[349],"As":[350],"Ceci":[351],"(1988)":[352],"discovered,":[353],"although":[354],"significant":[356,924],"percentage":[357],"express":[360],"belief":[362],"should":[365,560],"be":[366,397,727,977,1072,1152,1511],"shared,":[367],"majority":[369,1023],"acknowledge":[370],"obtaining":[374],"another":[375,1297],"researcher's":[376],"difficult.":[379],"It":[380],"important":[382,872],"understand":[384,1529],"this":[385,921,1207,1241,1295],"did":[386,1049],"not":[387,548,1019,1039,1050,1246],"precluded":[388],"cooperative":[389],"efforts;":[390],"however,":[391],"largely":[394],"selective":[398],"who":[400,432,1438],"they":[401],"worked":[402],"with.":[403],"Geographical":[404],"constraints":[405],"also":[407,512,614],"influential,":[408],"particularly":[409],"industry-academic":[411],"partnerships":[412],"(Jaffe,":[413],"1993).":[414],"Depending":[415],"discipline,":[418],"with":[421,795,841,1315,1331,1372,1396,1491,1500],"varying":[422],"levels":[423],"rates":[426],"densities":[428],"would":[429],"emerge.":[430],"Researchers":[431,472],"interested":[434,1440],"studying":[436],"evolution":[440],"resultant":[446],"innovation":[449],"Meyer,":[451],"2007;":[452],"Ramlogan":[453],"al,":[455],"2007)":[456],"variety":[459],"techniques,":[461],"including":[462,752],"network":[463,995],"graphs":[464],"White,":[466],"2009)":[467],"characteristics.":[471],"built":[473],"large":[475,1503],"body":[476,1419],"empirical":[478,596],"focused":[480,494,653],"relationships.":[483],"Due":[484],"limitations":[486,919],"availability":[489],"researchers":[493,679,1415,1435],"almost":[495,1454],"exclusively":[496],"process":[503,545,650],"\u2014":[504,554,844],"publication,":[507],"which,":[508],"mentioned":[510],"earlier,":[511],"metrics":[515,579,637,1457],"context":[517],"measuring":[519],"interpreting":[521],"will":[526,594,805,1341,1433,1469,1510],"advance":[527],"field":[529],"by":[532,1143,1211,1234],"focusing":[533],"methods":[535,552],"analysis":[538],"part":[541,665],"accessible":[549],"before":[553],"generation":[557],"phase.":[558],"improve":[561],"our":[562,1271],"understanding":[563,581],"throughout":[566],"process,":[569,627],"which":[570,661,1264],"will,":[571],"turn,":[573],"allow":[574],"develop":[577],"assessing":[583,1442],"offer":[595],"(in)validation":[597],"claims":[599],"shifts":[601],"due":[605,1076],"emergence":[608],"cyberinfrastructures.":[610],"pre-requisite":[616],"investigating":[618],"effects":[620],"constructing":[628],"support":[629],"services":[630],"CI-based":[632],"developing":[635,1144],"assessment":[636,1456,1467],"tools":[639],"measure":[641],"impact":[643,1446,1495],"funded":[645],"transformation":[649],"project":[652,804,1195],"GenBank,":[655,1544],"NIH":[657],"genetic":[658,693,713,722,754,771],"sequence":[659],"database,":[660],"turn":[663,1432],"International":[668],"Nucleotide":[669],"Sequence":[670],"Database":[671],"partnership.":[672],"set":[674,1209],"integrated":[676],"databases":[677],"allows":[678],"world":[683],"upload,":[685],"download,":[686],"revise":[688],"code":[694,956],"several":[696,706,709,855],"thousand":[697],"species.":[698,909],"For":[699,1274],"each":[700,715,969],"species":[701],"anywhere":[704],"hundred":[707],"million":[710],"submissions":[711,944],"updates,":[714],"covering":[716],"different":[717],"pieces":[718],"species'":[721],"code.":[723],"accessed":[728],"bulk":[730],"text":[731,810],"format,":[732],"or":[733,1065,1214,1230],"via":[734],"direct":[735,891],"download":[736],"XML":[738,823,842,885],"format":[739],"web":[742,820],"interface.":[743],"Each":[744],"record":[745,1082],"contains":[746],"information":[747,768],"submission,":[751],"metadata.":[759],"metadata":[761,788,905],"includes":[762],"intellectual":[763,983],"provenance":[764],"contained":[773],"record.":[776],"goal":[778],"test":[782],"compare":[787],"order":[790,980],"identify":[792,1260],"challenges":[793,1489],"collecting":[796],"repository.":[800],"full":[803],"extract":[806,1031,1042],"semi-structured":[809],"files,":[811],"drawn":[816],"interface":[821],"format.":[824],"Figure":[825],"1":[826],"details,":[827],"high":[830],"level,":[831],"steps":[833],"required":[834],"prepare":[836,849],"We":[839,1348],"started":[840],"Stylesheet":[843],"Transformation":[845],"(XSLT)":[846],"programs":[847],"Next,":[852],"executed":[854],"scripts":[856],"transform":[858],"MySQL":[862],"ready":[863],"commands":[864],"then":[866,1178],"loaded":[867],"database.":[870,959],"An":[871],"point":[873],"take":[875],"consideration":[877],"fact":[880,1452],"many":[882,1203,1501],"records":[886,906,1112,1174,1186],"poorly":[888],"formed,":[889],"making":[890],"feeds":[892],"database":[895,1111],"challenging.":[896],"first":[898,1132,1167],"segment":[899],"included":[902],"approximately":[903],"30,000":[904],"four":[908],"Limited":[910],"author":[911,1033,1045,1309,1368,1389],"affiliation":[912,1016],"preventing":[914],"disambiguation":[915],"given":[916,935],"time":[918],"project.":[922],"A":[923],"fuzzy":[927,1058,1129,1250],"duplicates,":[928,1251,1289,1361,1382],"publications":[931,948,1366,1387,1461],"authors":[933,950],"being":[934],"credit":[936,984],"same":[939,1189,1351],"multiple":[941,952,967],"times.":[942],"Duplicate":[943,1081],"patents":[946,1307],"because":[949,1202],"submit":[951],"sub-sections":[953],"So,":[960],"while":[961],"single":[963],"may":[965],"cover":[966],"sub-sequences,":[968],"subsequence":[970],"submitted":[972],"individually.":[973],"De-duplication":[974],"needs":[975],"performed":[978],"assign":[982,990],"properly,":[985],"weights":[991],"weighted":[994],"graph.":[996],"However,":[997],"problems":[1001],"arose":[1002],"during":[1003],"de-duplication":[1005],"phase":[1006],"prevented":[1008],"identifying":[1011],"resolving":[1013],"duplicates.":[1014,1059,1130,1193],"First,":[1015],"provided":[1020],"researchers.":[1025],"primary":[1032],"PubMed,":[1036],"any":[1043],"secondary":[1044],"Also,":[1047],"system":[1053,1232,1537,1552],"place":[1055],"remove":[1057,1249,1256,1339],"Fuzzy":[1060],"duplication":[1061,1265],"when":[1063,1353],"entry":[1067],"strings":[1068],"intended":[1070],"identical,":[1073],"but":[1074,1146],"vary":[1075,1210,1520],"some":[1078,1486],"typographical":[1079],"error.":[1080],"detection":[1083,1148],"well-researched":[1086],"problem":[1087,1296],"various":[1089],"fields":[1090],"Database,":[1093],"Artificial":[1094],"Intelligence,":[1095],"Statistics,":[1096],"Library":[1097],"Information":[1099],"Science.":[1100],"Many":[1101],"approaches":[1102],"proposed":[1105],"detect":[1107,1128],"duplicate":[1108],"entries":[1109,1205],"(see":[1113],"Elmagarmid":[1114],"al.":[1116],"2007":[1117],"review).":[1120],"Generally,":[1121],"main":[1125],"strategies":[1126],"uses":[1133],"probabilistic":[1134],"accuracy;":[1137],"other":[1139,1502],"focuses":[1140],"efficiency":[1142],"simple":[1145,1169],"fast":[1147],"techniques":[1149],"used":[1153],"millions":[1155],"records.":[1157],"current":[1159],"state-of-the-art":[1160],"approach":[1161,1201],"combines":[1162],"above":[1164],"strategies:":[1166],"algorithms":[1170,1181],"quickly":[1172],"group":[1173],"coarse":[1176],"clusters,":[1177],"sophisticated":[1180,1200],"decide":[1183],"whether":[1184],"cluster":[1190],"likely":[1192],"requires":[1196],"unique":[1204],"one":[1213,1512],"characters":[1216],"\u2026":[1218,1221],"isolate":[1219,1223],"KIL29":[1220],"VS\u2026.":[1222],"KIL30).":[1224],"Therefore,":[1225],"supervised":[1227],"learning":[1228],"algorithm":[1229],"guided":[1233],"human":[1235],"heuristics":[1236],"needed":[1238],"solve":[1240],"problem.":[1242],"exact":[1257,1279,1288,1381],"duplicates":[1258,1280,1302,1323,1340],"extent":[1262],"occurs.":[1266],"Included":[1267],"results":[1269],"initial":[1272],"analysis.":[1273],"patents,":[1275],"prior":[1276,1299],"removing":[1278,1287,1301,1322,1360,1380],"1839":[1283],"patent":[1284],"submissions.":[1285],"After":[1286,1321,1379],"172.":[1292],"Looking":[1293],"way,":[1298],"mean":[1304,1325,1363,1384],"per":[1308,1367,1388],"13.8":[1311],"(\u03c3":[1312,1328,1369,1393],"=":[1313,1329,1370,1394],"38.6),":[1314],"maximum":[1317,1333,1374,1398],"value":[1318,1334,1375],"311.":[1320],"equaled":[1326],"1.4":[1327],"1.02),":[1330],"10.":[1336],"Failure":[1337],"result":[1342],"gross":[1344,1556],"over-reporting":[1345],"productivity.":[1347],"observed":[1349],"pattern":[1352],"Prior":[1358],"48.1":[1365],"380.8),":[1371],"7124":[1377],"publications.":[1378],"dropped":[1390],"2.17":[1392],"3.19),":[1395],"equal":[1399],"45.":[1401],"growth":[1403],"affecting":[1410],"types":[1412],"contributions":[1414,1476],"science,":[1421],"how":[1426,1530],"conduct":[1428],"affect":[1434],"administrators":[1437],"contribution":[1443,1493],"to,":[1444],"on,":[1447],"progress.":[1449],"Given":[1450],"all":[1455],"citations":[1463],"publications,":[1466],"experts":[1468],"ways":[1473],"incorporate":[1475],"their":[1481],"models.":[1482],"reveals":[1485],"determining":[1492],"repositories.":[1498],"Along":[1499],"sets,":[1505],"preprocessing":[1507],"cleaning":[1509],"biggest":[1515],"challenges,":[1516],"quality.":[1522],"Finally,":[1523],"it":[1524],"absolutely":[1526],"necessary":[1527],"entered":[1534],"because,":[1538],"seen":[1542],"failure":[1545],"functioning":[1549],"lead":[1554],"reporting":[1557],"errors.":[1558]},"counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
