{"id":"https://openalex.org/W2509601217","doi":"https://doi.org/10.18653/v1/w16-2911","title":"Unsupervised Document Classification with Informed Topic Models","display_name":"Unsupervised Document Classification with Informed Topic Models","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2509601217","doi":"https://doi.org/10.18653/v1/w16-2911","mag":"2509601217"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w16-2911","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2911","pdf_url":"https://www.aclweb.org/anthology/W16-2911.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Workshop on Biomedical Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W16-2911.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5039901099","display_name":"Timothy A. Miller","orcid":"https://orcid.org/0000-0003-4513-403X"},"institutions":[{"id":"https://openalex.org/I1288882113","display_name":"Boston Children's Hospital","ror":"https://ror.org/00dvg7y05","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1288882113"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Timothy Miller","raw_affiliation_strings":["Boston Children's Hospital Informatics Program, Harvard Medical School, Boston, MA 02115"],"affiliations":[{"raw_affiliation_string":"Boston Children's Hospital Informatics Program, Harvard Medical School, Boston, MA 02115","institution_ids":["https://openalex.org/I1288882113","https://openalex.org/I136199984"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054031099","display_name":"Dmitriy Dligach","orcid":"https://orcid.org/0000-0002-2585-2707"},"institutions":[{"id":"https://openalex.org/I1925986","display_name":"Loyola University Chicago","ror":"https://ror.org/04b6x2g63","country_code":"US","type":"education","lineage":["https://openalex.org/I1925986"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dmitriy Dligach","raw_affiliation_strings":["Department of Computer Science, Loyola University Chicago, Chicago, IL 60611"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Loyola University Chicago, Chicago, IL 60611","institution_ids":["https://openalex.org/I1925986"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5087865794","display_name":"Guergana Savova","orcid":"https://orcid.org/0000-0002-5887-200X"},"institutions":[{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]},{"id":"https://openalex.org/I1288882113","display_name":"Boston Children's Hospital","ror":"https://ror.org/00dvg7y05","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1288882113"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guergana Savova","raw_affiliation_strings":["Boston Children's Hospital Informatics Program, Harvard Medical School, Boston, MA 02115"],"affiliations":[{"raw_affiliation_string":"Boston Children's Hospital Informatics Program, Harvard Medical School, Boston, MA 02115","institution_ids":["https://openalex.org/I1288882113","https://openalex.org/I136199984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5054031099"],"corresponding_institution_ids":["https://openalex.org/I1925986"],"apc_list":null,"apc_paid":null,"fwci":0.8834,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.8429556,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"83","last_page":"91"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6813353300094604},{"id":"https://openalex.org/keywords/document-classification","display_name":"Document classification","score":0.45376646518707275},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44792813062667847},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44429540634155273},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44071465730667114},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.34960106015205383}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6813353300094604},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.45376646518707275},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44792813062667847},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44429540634155273},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44071465730667114},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.34960106015205383}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/w16-2911","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2911","pdf_url":"https://www.aclweb.org/anthology/W16-2911.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Workshop on Biomedical Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/w16-2911","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w16-2911","pdf_url":"https://www.aclweb.org/anthology/W16-2911.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 15th Workshop on Biomedical Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6399999856948853,"id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G1287797271","display_name":null,"funder_award_id":"LM008748","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G2053124063","display_name":null,"funder_award_id":"U54LM008748","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3611247453","display_name":null,"funder_award_id":"R01GM","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G3736491172","display_name":null,"funder_award_id":"U54LM00874","funder_id":"https://openalex.org/F4320337372","funder_display_name":"U.S. National Library of Medicine"},{"id":"https://openalex.org/G4359693134","display_name":null,"funder_award_id":"NIGMS","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G4863354295","display_name":null,"funder_award_id":"R01GM114355","funder_id":"https://openalex.org/F4320337354","funder_display_name":"National Institute of General Medical Sciences"},{"id":"https://openalex.org/G6531036774","display_name":null,"funder_award_id":"R01GM114355","funder_id":"https://openalex.org/F4320332161","funder_display_name":"National Institutes of Health"},{"id":"https://openalex.org/G8744663245","display_name":null,"funder_award_id":"U54LM008748","funder_id":"https://openalex.org/F4320337372","funder_display_name":"U.S. National Library of Medicine"}],"funders":[{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"},{"id":"https://openalex.org/F4320337354","display_name":"National Institute of General Medical Sciences","ror":"https://ror.org/04q48ey07"},{"id":"https://openalex.org/F4320337372","display_name":"U.S. National Library of Medicine","ror":"https://ror.org/0060t0j89"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2509601217.pdf","grobid_xml":"https://content.openalex.org/works/W2509601217.grobid-xml"},"referenced_works_count":17,"referenced_works":["https://openalex.org/W1506246224","https://openalex.org/W1576520375","https://openalex.org/W1880262756","https://openalex.org/W1969486090","https://openalex.org/W1999954155","https://openalex.org/W2001082470","https://openalex.org/W2056616115","https://openalex.org/W2103587173","https://openalex.org/W2106490775","https://openalex.org/W2112076978","https://openalex.org/W2118020653","https://openalex.org/W2133990480","https://openalex.org/W2140124448","https://openalex.org/W2146241755","https://openalex.org/W2903158431","https://openalex.org/W2912651257","https://openalex.org/W4231510805"],"related_works":["https://openalex.org/W2384888906","https://openalex.org/W2144190808","https://openalex.org/W2376314740","https://openalex.org/W2366644548","https://openalex.org/W2357241418","https://openalex.org/W2611614995","https://openalex.org/W2115485936","https://openalex.org/W2368651715","https://openalex.org/W2789919619","https://openalex.org/W3107474891"],"abstract_inverted_index":{"Document":[0],"classification":[1,13,88],"is":[2],"an":[3,94],"important":[4],"and":[5,31,65],"common":[6],"application":[7],"in":[8,21,93,98],"natural":[9],"language":[10],"processing.":[11],"Scaling":[12],"approaches":[14],"to":[15,40,60,74,103],"many":[16],"targets":[17],"faces":[18],"a":[19,33,45],"bottleneck":[20],"acquiring":[22],"gold":[23],"standard":[24],"labels.":[25],"In":[26],"this":[27,62],"work,":[28],"we":[29],"develop":[30],"evaluate":[32,83],"method":[34,69],"for":[35,52],"using":[36],"informed":[37],"topic":[38,72],"models":[39,73],"noisily":[41],"label":[42,76],"documents,":[43],"creating":[44],"noisy":[46,63],"but":[47],"usable":[48],"set":[49],"of":[50],"labels":[51],"training":[53,77],"discriminative":[54],"classifiers.":[55],"We":[56,82],"investigate":[57],"multiple":[58],"ways":[59],"train":[61],"classifier,":[64],"the":[66,87],"best":[67],"performing":[68],"uses":[70],"Wikipedia-seeded":[71],"approximately":[75],"instances":[78],"without":[79],"any":[80],"supervision.":[81],"these":[84],"methods":[85],"on":[86],"task":[89],"as":[90,92],"well":[91],"active":[95,109],"learning":[96,105],"setting,":[97],"which":[99],"they":[100],"are":[101],"shown":[102],"improve":[104],"rates":[106],"over":[107],"traditional":[108],"learning.":[110]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
