{"id":"https://openalex.org/W4233693124","doi":"https://doi.org/10.1109/jcdl.2014.6970172","title":"The feasibility of investing in manual correction of metadata for a large-scale digital library","display_name":"The feasibility of investing in manual correction of metadata for a large-scale digital library","publication_year":2014,"publication_date":"2014-09-01","ids":{"openalex":"https://openalex.org/W4233693124","doi":"https://doi.org/10.1109/jcdl.2014.6970172"},"language":"en","primary_location":{"id":"doi:10.1109/jcdl.2014.6970172","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcdl.2014.6970172","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Joint Conference on Digital Libraries","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5078925594","display_name":"Hung\u2010Hsuan Chen","orcid":"https://orcid.org/0000-0001-5137-4449"},"institutions":[{"id":"https://openalex.org/I4210148468","display_name":"Industrial Technology Research Institute","ror":"https://ror.org/05szzwt63","country_code":"TW","type":"nonprofit","lineage":["https://openalex.org/I4210148468"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Hung-Hsuan Chen","raw_affiliation_strings":["Computational Intelligence Technology Center, Industrial Technology Research Institute, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"Computational Intelligence Technology Center, Industrial Technology Research Institute, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I4210148468"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054253075","display_name":"Madian Khabsa","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Madian Khabsa","raw_affiliation_strings":["Computer Science and Engineering"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001294898","display_name":"C. Lee Giles","orcid":"https://orcid.org/0000-0002-1931-585X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"C. Lee Giles","raw_affiliation_strings":["Information Sciences and Technology, University Park, PA, US"],"affiliations":[{"raw_affiliation_string":"Information Sciences and Technology, University Park, PA, US","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5078925594"],"corresponding_institution_ids":["https://openalex.org/I4210148468"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.46223322,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"51","issue":null,"first_page":"225","last_page":"228"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9966999888420105,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.9339734315872192},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8075253963470459},{"id":"https://openalex.org/keywords/correctness","display_name":"Correctness","score":0.7489429116249084},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.7146784067153931},{"id":"https://openalex.org/keywords/digital-library","display_name":"Digital library","score":0.7074005603790283},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.6157861948013306},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5999572277069092},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5789569020271301},{"id":"https://openalex.org/keywords/metadata-repository","display_name":"Metadata repository","score":0.49773696064949036},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.0789928138256073},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.05394113063812256}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.9339734315872192},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8075253963470459},{"id":"https://openalex.org/C55439883","wikidata":"https://www.wikidata.org/wiki/Q360812","display_name":"Correctness","level":2,"score":0.7489429116249084},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.7146784067153931},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.7074005603790283},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.6157861948013306},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5999572277069092},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5789569020271301},{"id":"https://openalex.org/C153048206","wikidata":"https://www.wikidata.org/wiki/Q3454922","display_name":"Metadata repository","level":3,"score":0.49773696064949036},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0789928138256073},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.05394113063812256},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/jcdl.2014.6970172","is_oa":false,"landing_page_url":"https://doi.org/10.1109/jcdl.2014.6970172","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Joint Conference on Digital Libraries","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1559499673","https://openalex.org/W1981791873","https://openalex.org/W1984208669","https://openalex.org/W2000042664","https://openalex.org/W2005359672","https://openalex.org/W2022322548","https://openalex.org/W2057954853","https://openalex.org/W2060186854","https://openalex.org/W2098564132","https://openalex.org/W2113800578","https://openalex.org/W2121259645","https://openalex.org/W2139882553","https://openalex.org/W4253723135","https://openalex.org/W6633305578"],"related_works":["https://openalex.org/W1552553528","https://openalex.org/W2183628870","https://openalex.org/W3023161639","https://openalex.org/W2008531296","https://openalex.org/W2782431616","https://openalex.org/W2394393789","https://openalex.org/W2374379029","https://openalex.org/W1503116306","https://openalex.org/W4299935056","https://openalex.org/W4385414095"],"abstract_inverted_index":{"Given":[0],"a":[1,28,38,86,92,96,107,115,123],"large-scale":[2,97],"digital":[3,98],"library":[4],"that":[5,37,65],"automatically":[6],"crawls":[7],"and":[8,17,80,118,129],"parses":[9],"PDF":[10],"files":[11,56],"to":[12,26,60,105,110,122],"generate":[13],"metadata":[14],"for":[15,71,95],"documents":[16],"authors,":[18],"we":[19,100],"estimate":[20,101],"the":[21,32,35,66,112],"number":[22],"of":[23,31,41,68,89,114,126],"person-hours":[24,109],"required":[25],"correct":[27],"small":[29,87],"portion":[30,40,125],"metadata,":[33,117],"in":[34],"hope":[36],"large":[39],"users":[42,50,69],"can":[43],"benefit":[44],"from":[45,57],"these":[46],"corrections.":[47],"We":[48,63],"obtain":[49],"requests":[51,70],"by":[52],"analyzing":[53],"Cite-SeerX's":[54],"log":[55],"September":[58],"2009":[59],"March":[61],"2013.":[62],"found":[64],"distribution":[67],"search":[72,78,82,128,131],"is":[73,103],"highly":[74],"imbalanced:":[75],"most":[76],"document":[77,127],"queries":[79,83],"author":[81,130],"concentrate":[84],"on":[85],"set":[88],"terms.":[90],"As":[91],"result,":[93],"even":[94],"library,":[99],"it":[102],"affordable":[104],"invest":[106],"few":[108,116],"check":[111],"correctness":[113],"thus":[119],"provide":[120],"benefits":[121],"good":[124],"requests.":[132]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
