{"id":"https://openalex.org/W2295403884","doi":"https://doi.org/10.1007/978-3-319-25485-2_1","title":"Document Analysis and Retrieval Tasks in Scientific Digital Libraries","display_name":"Document Analysis and Retrieval Tasks in Scientific Digital Libraries","publication_year":2015,"publication_date":"2015-01-01","ids":{"openalex":"https://openalex.org/W2295403884","doi":"https://doi.org/10.1007/978-3-319-25485-2_1","mag":"2295403884"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-319-25485-2_1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-319-25485-2_1","pdf_url":null,"source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1007/978-3-319-25485-2_1","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076157833","display_name":"Sujatha Das Gollapalli","orcid":"https://orcid.org/0000-0002-4567-8937"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Sujatha Das Gollapalli","raw_affiliation_strings":["Institute for Infocomm Research, Agency for Science and Technology Research, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, Agency for Science and Technology Research, Singapore, Singapore","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089085275","display_name":"Cornelia Caragea","orcid":"https://orcid.org/0000-0002-5664-2163"},"institutions":[{"id":"https://openalex.org/I123534392","display_name":"University of North Texas","ror":"https://ror.org/00v97ad02","country_code":"US","type":"education","lineage":["https://openalex.org/I123534392"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Cornelia Caragea","raw_affiliation_strings":["Computer Science and Engineering, University of North Texas, Denton, USA"],"affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, University of North Texas, Denton, USA","institution_ids":["https://openalex.org/I123534392"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100418684","display_name":"Xiaoli Li","orcid":"https://orcid.org/0000-0002-0762-6562"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Xiaoli Li","raw_affiliation_strings":["Institute for Infocomm Research, Agency for Science and Technology Research, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, Agency for Science and Technology Research, Singapore, Singapore","institution_ids":["https://openalex.org/I3005327000"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001294898","display_name":"C. Lee Giles","orcid":"https://orcid.org/0000-0002-1931-585X"},"institutions":[{"id":"https://openalex.org/I130769515","display_name":"Pennsylvania State University","ror":"https://ror.org/04p491231","country_code":"US","type":"education","lineage":["https://openalex.org/I130769515"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C. Lee Giles","raw_affiliation_strings":["Information Sciences and Technology, Computer Science and Engineering, The Pennsylvania State University, State College, USA"],"affiliations":[{"raw_affiliation_string":"Information Sciences and Technology, Computer Science and Engineering, The Pennsylvania State University, State College, USA","institution_ids":["https://openalex.org/I130769515"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5076157833"],"corresponding_institution_ids":["https://openalex.org/I3005327000"],"apc_list":null,"apc_paid":null,"fwci":0.3269,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.57444296,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"3","last_page":"20"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8113441467285156},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.7663344144821167},{"id":"https://openalex.org/keywords/digital-library","display_name":"Digital library","score":0.7424944639205933},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.669134259223938},{"id":"https://openalex.org/keywords/crawling","display_name":"Crawling","score":0.6632916331291199},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6528720855712891},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.5756968259811401},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.5265218019485474},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.4889932870864868},{"id":"https://openalex.org/keywords/relevance","display_name":"Relevance (law)","score":0.44744637608528137},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.41701897978782654},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.23420733213424683}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8113441467285156},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.7663344144821167},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.7424944639205933},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.669134259223938},{"id":"https://openalex.org/C100368936","wikidata":"https://www.wikidata.org/wiki/Q1411725","display_name":"Crawling","level":2,"score":0.6632916331291199},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6528720855712891},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.5756968259811401},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.5265218019485474},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.4889932870864868},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.44744637608528137},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.41701897978782654},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.23420733213424683},{"id":"https://openalex.org/C71924100","wikidata":"https://www.wikidata.org/wiki/Q11190","display_name":"Medicine","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C105702510","wikidata":"https://www.wikidata.org/wiki/Q514","display_name":"Anatomy","level":1,"score":0.0},{"id":"https://openalex.org/C124952713","wikidata":"https://www.wikidata.org/wiki/Q8242","display_name":"Literature","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C164913051","wikidata":"https://www.wikidata.org/wiki/Q482","display_name":"Poetry","level":2,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/978-3-319-25485-2_1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-319-25485-2_1","pdf_url":null,"source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.1007/978-3-319-25485-2_1","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-319-25485-2_1","pdf_url":null,"source":{"id":"https://openalex.org/S2764900261","display_name":"Communications in computer and information science","issn_l":"1865-0929","issn":["1865-0929","1865-0937"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications in Computer and Information Science","raw_type":"book-chapter"},"sustainable_development_goals":[{"score":0.5199999809265137,"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":69,"referenced_works":["https://openalex.org/W71104953","https://openalex.org/W193741420","https://openalex.org/W259338706","https://openalex.org/W1489992655","https://openalex.org/W1490343430","https://openalex.org/W1492250428","https://openalex.org/W1506845741","https://openalex.org/W1528729143","https://openalex.org/W1532325895","https://openalex.org/W1559499673","https://openalex.org/W1575736368","https://openalex.org/W1590650811","https://openalex.org/W1794016765","https://openalex.org/W1854214752","https://openalex.org/W1880262756","https://openalex.org/W1888811121","https://openalex.org/W1907578970","https://openalex.org/W1956559956","https://openalex.org/W1970043789","https://openalex.org/W1971947061","https://openalex.org/W1975432235","https://openalex.org/W1983531058","https://openalex.org/W2001082470","https://openalex.org/W2005677600","https://openalex.org/W2022322548","https://openalex.org/W2034721576","https://openalex.org/W2064418625","https://openalex.org/W2067292826","https://openalex.org/W2083214110","https://openalex.org/W2097580385","https://openalex.org/W2104973501","https://openalex.org/W2107473756","https://openalex.org/W2107743791","https://openalex.org/W2110591510","https://openalex.org/W2121577889","https://openalex.org/W2121702856","https://openalex.org/W2122052811","https://openalex.org/W2125327503","https://openalex.org/W2129028998","https://openalex.org/W2139212933","https://openalex.org/W2140190241","https://openalex.org/W2140479099","https://openalex.org/W2140855027","https://openalex.org/W2143930661","https://openalex.org/W2146769536","https://openalex.org/W2147152072","https://openalex.org/W2147880316","https://openalex.org/W2150192952","https://openalex.org/W2152862885","https://openalex.org/W2154368244","https://openalex.org/W2161563551","https://openalex.org/W2171113409","https://openalex.org/W2172106655","https://openalex.org/W2186419784","https://openalex.org/W2222878176","https://openalex.org/W2251786111","https://openalex.org/W2295508865","https://openalex.org/W2400193661","https://openalex.org/W2403701745","https://openalex.org/W2486715084","https://openalex.org/W2519011775","https://openalex.org/W2736801446","https://openalex.org/W2913389685","https://openalex.org/W4213009331","https://openalex.org/W4233135949","https://openalex.org/W4243616875","https://openalex.org/W4247889041","https://openalex.org/W4285719527","https://openalex.org/W4297751839"],"related_works":["https://openalex.org/W4321258516","https://openalex.org/W2051833850","https://openalex.org/W4287845917","https://openalex.org/W3156164993","https://openalex.org/W2385015894","https://openalex.org/W2171573941","https://openalex.org/W2165912799","https://openalex.org/W3040878054","https://openalex.org/W3119324922","https://openalex.org/W1987401411"],"abstract_inverted_index":{"Machine":[0],"Learning":[1],"(ML)":[2],"algorithms":[3],"have":[4],"opened":[5],"up":[6],"new":[7],"possibilities":[8],"for":[9,47],"the":[10,41,59,126],"acquisition":[11],"and":[12,37,61,119,132],"processing":[13],"of":[14,43,70,96],"documents":[15,33,97],"in":[16,57,73,81,129],"Information":[17],"Retrieval":[18],"(IR)":[19],"systems.":[20],"Indeed,":[21],"it":[22],"is":[23],"now":[24],"possible":[25],"to":[26,32,78,99],"automate":[27],"several":[28,114],"labor-intensive":[29],"tasks":[30,51,80,131],"related":[31,98],"such":[34,109],"as":[35,110],"categorization":[36],"entity":[38],"extraction.":[39],"Consequently,":[40],"application":[42],"machine":[44,75,135],"learning":[45,76,136],"techniques":[46,77],"various":[48],"large-scale":[49],"IR":[50,62,90],"has":[52],"gathered":[53],"significant":[54],"research":[55,72],"interest":[56],"both":[58],"ML":[60],"communities.":[63],"This":[64],"tutorial":[65],"provides":[66],"a":[67],"reference":[68],"summary":[69],"our":[71],"applying":[74],"diverse":[79],"Digital":[82,85],"Libraries":[83],"(DL).":[84],"library":[86],"portals":[87],"are":[88],"specialized":[89],"systems":[91],"that":[92],"work":[93],"on":[94,104,125],"collections":[95],"particular":[100],"domains.":[101],"We":[102,123],"focus":[103],"open-access,":[105],"scientific":[106],"digital":[107],"libraries":[108],"CiteSeer$$^x$$,":[111],"which":[112],"involve":[113],"crawling,":[115],"ranking,":[116],"content":[117],"analysis,":[118],"metadata":[120],"extraction":[121],"tasks.":[122],"elaborate":[124],"challenges":[127],"involved":[128],"these":[130,141],"highlight":[133],"how":[134],"methods":[137],"can":[138],"successfully":[139],"address":[140],"challenges.":[142]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
