{"id":"https://openalex.org/W2784338667","doi":"https://doi.org/10.1145/3197026.3197039","title":"Using Deep Learning for Title-Based Semantic Subject Indexing to Reach Competitive Performance to Full-Text","display_name":"Using Deep Learning for Title-Based Semantic Subject Indexing to Reach Competitive Performance to Full-Text","publication_year":2018,"publication_date":"2018-05-23","ids":{"openalex":"https://openalex.org/W2784338667","doi":"https://doi.org/10.1145/3197026.3197039","mag":"2784338667"},"language":"en","primary_location":{"id":"doi:10.1145/3197026.3197039","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3197026.3197039","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE on Joint Conference on Digital Libraries","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/1801.06717","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Florian Mai","orcid":null},"institutions":[{"id":"https://openalex.org/I32021983","display_name":"Kiel University","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Florian Mai","raw_affiliation_strings":["Kiel University, Kiel, Germany"],"affiliations":[{"raw_affiliation_string":"Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I32021983"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Lukas Galke","orcid":null},"institutions":[{"id":"https://openalex.org/I32021983","display_name":"Kiel University","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Lukas Galke","raw_affiliation_strings":["Kiel University, Kiel, Germany"],"affiliations":[{"raw_affiliation_string":"Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I32021983"]}]},{"author_position":"last","author":{"id":null,"display_name":"Ansgar Scherp","orcid":null},"institutions":[{"id":"https://openalex.org/I32021983","display_name":"Kiel University","ror":"https://ror.org/04v76ef78","country_code":"DE","type":"education","lineage":["https://openalex.org/I32021983"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ansgar Scherp","raw_affiliation_strings":["Kiel University, Kiel, Germany"],"affiliations":[{"raw_affiliation_string":"Kiel University, Kiel, Germany","institution_ids":["https://openalex.org/I32021983"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I32021983"],"apc_list":null,"apc_paid":null,"fwci":2.0309,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.89597535,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"169","last_page":"178"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9973000288009644,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.6520000100135803},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6330000162124634},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6179999709129333},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.605400025844574},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.6033999919891357},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5253000259399414},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.4269999861717224},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.3741999864578247}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8353000283241272},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.6520000100135803},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6330000162124634},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6328999996185303},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6179999709129333},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.605400025844574},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.6033999919891357},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5253000259399414},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4767000079154968},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4318000078201294},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.4269999861717224},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.3741999864578247},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3725000023841858},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.3506999909877777},{"id":"https://openalex.org/C2778330532","wikidata":"https://www.wikidata.org/wiki/Q4826577","display_name":"Automatic indexing","level":3,"score":0.35019999742507935},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.34299999475479126},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3156999945640564},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.3068999946117401},{"id":"https://openalex.org/C2776959682","wikidata":"https://www.wikidata.org/wiki/Q17005296","display_name":"Co-training","level":3,"score":0.29679998755455017},{"id":"https://openalex.org/C187191949","wikidata":"https://www.wikidata.org/wiki/Q1138496","display_name":"Profiling (computer programming)","level":2,"score":0.28630000352859497},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C2781039887","wikidata":"https://www.wikidata.org/wiki/Q1391724","display_name":"Factor (programming language)","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1145/3197026.3197039","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3197026.3197039","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 18th ACM/IEEE on Joint Conference on Digital Libraries","raw_type":"proceedings-article"},{"id":"pmh:oai:pub.zbw.eu:11108/369","is_oa":false,"landing_page_url":"http://hdl.handle.net/11108/369","pdf_url":null,"source":{"id":"https://openalex.org/S4306402552","display_name":"ZBW Publication Archive (ZBW \u2013 Leibniz Information Centre for Economics)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2800024560","host_organization_name":"ZBW \u2013 Leibniz-Informationszentrum Wirtschaft","host_organization_lineage":["https://openalex.org/I2800024560"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"acceptedVersion","is_accepted":true,"is_published":false,"raw_source_name":"","raw_type":"doc-type:conferenceObject"},{"id":"pmh:oai:arXiv.org:1801.06717","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1801.06717","pdf_url":"https://arxiv.org/pdf/1801.06717","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:dspace.stir.ac.uk:1893/28020","is_oa":false,"landing_page_url":"http://hdl.handle.net/1893/28020","pdf_url":null,"source":{"id":"https://openalex.org/S4306400268","display_name":"Stirling Online Research Repository (University of Stirling)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I12093191","host_organization_name":"University of Stirling","host_organization_lineage":["https://openalex.org/I12093191"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference Paper"},{"id":"pmh:oai:zenodo.org:1286796","is_oa":true,"landing_page_url":"https://zenodo.org/record/1286796","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/conferencePaper"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1801.06717","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1801.06717","pdf_url":"https://arxiv.org/pdf/1801.06717","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3143778357","display_name":null,"funder_award_id":"693092","funder_id":"https://openalex.org/F4320332999","funder_display_name":"Horizon 2020 Framework Programme"},{"id":"https://openalex.org/G6340432390","display_name":null,"funder_award_id":"693092","funder_id":"https://openalex.org/F4320335254","funder_display_name":"Horizon 2020"}],"funders":[{"id":"https://openalex.org/F4320332999","display_name":"Horizon 2020 Framework Programme","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320335254","display_name":"Horizon 2020","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W108526313","https://openalex.org/W1576514601","https://openalex.org/W1808807089","https://openalex.org/W1832693441","https://openalex.org/W1978394996","https://openalex.org/W1981208470","https://openalex.org/W2046352419","https://openalex.org/W2064675550","https://openalex.org/W2074909580","https://openalex.org/W2084665596","https://openalex.org/W2119466907","https://openalex.org/W2142955026","https://openalex.org/W2152143870","https://openalex.org/W2194775991","https://openalex.org/W2308414794","https://openalex.org/W2606900069","https://openalex.org/W2739996966","https://openalex.org/W2757203079"],"related_works":[],"abstract_inverted_index":{"For":[0],"(semi-)automated":[1],"subject":[2],"indexing":[3],"systems":[4],"in":[5,89],"digital":[6],"libraries,":[7],"it":[8,31,81,95],"is":[9,32,60,76,82],"often":[10],"more":[11],"practical":[12],"to":[13,34,85,93,121,182],"use":[14,94],"metadata":[15],"such":[16],"as":[17],"the":[18,25,28,48,55,64,67,70,106,140,151,164,198,206,225,232,235,241,244],"title":[19,49,87,117],"of":[20,24,50,73,116,129,155,157,170,180,243,251],"a":[21,51,126,136,168,217,249],"publication":[22],"instead":[23],"full-text":[26,99,214,227,246],"or":[27],"abstract.":[29],"Therefore,":[30],"desirable":[33],"have":[35],"good":[36],"text":[37,40],"mining":[38],"and":[39,92,144,153,161,172,193],"classification":[41,56],"algorithms":[42],"that":[43],"operate":[44],"well":[45],"already":[46],"on":[47,58,66,113,125,135,197],"publication.":[52],"So":[53],"far,":[54],"performance":[57,65,196,242],"titles":[59,152],"not":[61],"competitive":[62],"with":[63,248],"full-texts":[68,166],"if":[69],"same":[71],"number":[72,128],"training":[74,97,112,118,124],"samples":[75],"used":[77],"for":[78,96],"training.":[79],"However,":[80],"much":[83],"easier":[84],"obtain":[86],"data":[88,119,181],"large":[90,178,218],"quantities":[91],"than":[98],"data.":[100],"In":[101,148],"this":[102,133],"paper,":[103],"we":[104,186],"investigate":[105],"question":[107,134],"how":[108],"models":[109,122],"obtained":[110],"from":[111,123,139,145],"increasing":[114],"amounts":[115,179],"compare":[120],"constant":[127],"full-texts.":[130],"We":[131],"evaluate":[132,194],"large-scale":[137],"dataset":[138],"medical":[141],"domain":[142],"(PubMed)":[143],"economics":[146],"(EconBiz).":[147],"these":[149,177],"datasets,":[150],"annotations":[154],"millions":[156],"publications":[158],"are":[159,203],"available,":[160],"they":[162],"outnumber":[163],"available":[165],"by":[167,216,229],"factor":[169],"20":[171],"15,":[173],"respectively.":[174],"To":[175],"exploit":[176],"their":[183,195,213],"full":[184],"potential,":[185],"develop":[187],"three":[188,210],"strong":[189],"deep":[190],"learning":[191],"classifiers":[192,211],"two":[199],"datasets.":[200],"The":[201,220],"results":[202],"promising.":[204],"On":[205,231],"EconBiz":[207],"dataset,":[208,234],"all":[209],"outperform":[212],"counterparts":[215],"margin.":[219],"best":[221,226,236,245],"title-based":[222,237],"classifier":[223],"outperforms":[224],"method":[228,238],"9.4%.":[230],"PubMed":[233],"almost":[239],"reaches":[240],"classifier,":[247],"difference":[250],"only":[252],"2.9%.":[253]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2018-02-02T00:00:00"}
