{"id":"https://openalex.org/W3035055211","doi":"https://doi.org/10.1145/3397271.3401168","title":"Minimally Supervised Categorization of Text with Metadata","display_name":"Minimally Supervised Categorization of Text with Metadata","publication_year":2020,"publication_date":"2020-07-25","ids":{"openalex":"https://openalex.org/W3035055211","doi":"https://doi.org/10.1145/3397271.3401168","mag":"3035055211"},"language":"en","primary_location":{"id":"doi:10.1145/3397271.3401168","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3397271.3401168","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003837744","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0003-0540-6758"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100770786","display_name":"Meng Yu","orcid":"https://orcid.org/0000-0003-2554-2888"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Meng","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046688345","display_name":"Jiaxin Huang","orcid":"https://orcid.org/0000-0001-8095-3343"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiaxin Huang","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038743835","display_name":"Frank F. Xu","orcid":"https://orcid.org/0000-0002-9662-7582"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Frank F. Xu","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100328989","display_name":"Xuan Wang","orcid":"https://orcid.org/0000-0002-1381-8958"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xuan Wang","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019539533","display_name":"Jiawei Han","orcid":"https://orcid.org/0000-0002-3629-2696"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiawei Han","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Champaign, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Champaign, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5003837744"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":3.1814,"has_fulltext":false,"cited_by_count":30,"citation_normalized_percentile":{"value":0.93214694,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1231","last_page":"1240"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.903915524482727},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8347141742706299},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.7907567024230957},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6363387703895569},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.535675585269928},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5331472754478455},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4836624562740326},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.44484856724739075},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3978748917579651},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.39001110196113586},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2385132908821106}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.903915524482727},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8347141742706299},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.7907567024230957},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6363387703895569},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.535675585269928},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5331472754478455},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4836624562740326},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.44484856724739075},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3978748917579651},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.39001110196113586},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2385132908821106},{"id":"https://openalex.org/C149635348","wikidata":"https://www.wikidata.org/wiki/Q193040","display_name":"Embedded system","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3397271.3401168","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3397271.3401168","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 43rd International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2104517209","display_name":null,"funder_award_id":"IIS-17-41317","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G5490100290","display_name":null,"funder_award_id":"HDTRA11810026","funder_id":"https://openalex.org/F4320332186","funder_display_name":"Defense Threat Reduction Agency"},{"id":"https://openalex.org/G8851674072","display_name":null,"funder_award_id":"W911NF-17-C-0099","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320332186","display_name":"Defense Threat Reduction Agency","ror":"https://ror.org/04tz64554"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W103340358","https://openalex.org/W746911252","https://openalex.org/W1615991656","https://openalex.org/W1888005072","https://openalex.org/W2018277822","https://openalex.org/W2061873838","https://openalex.org/W2145658888","https://openalex.org/W2157589241","https://openalex.org/W2251292973","https://openalex.org/W2294774419","https://openalex.org/W2470673105","https://openalex.org/W2533513334","https://openalex.org/W2739996966","https://openalex.org/W2740161049","https://openalex.org/W2740721704","https://openalex.org/W2742940593","https://openalex.org/W2743104969","https://openalex.org/W2744097819","https://openalex.org/W2767878862","https://openalex.org/W2798600398","https://openalex.org/W2802181385","https://openalex.org/W2890931111","https://openalex.org/W2897454190","https://openalex.org/W2914602134","https://openalex.org/W2962946486","https://openalex.org/W2963413667","https://openalex.org/W2963622218","https://openalex.org/W2980708516","https://openalex.org/W3004119480","https://openalex.org/W3034588688","https://openalex.org/W3099045991","https://openalex.org/W3101606352","https://openalex.org/W3104717349","https://openalex.org/W3105705953","https://openalex.org/W4254955164"],"related_works":["https://openalex.org/W2595172197","https://openalex.org/W2084856301","https://openalex.org/W2127970246","https://openalex.org/W2885125400","https://openalex.org/W1989889224","https://openalex.org/W4382618745","https://openalex.org/W1973775000","https://openalex.org/W2748922771","https://openalex.org/W1987128138","https://openalex.org/W2743976221"],"abstract_inverted_index":{"Document":[0],"categorization,":[1],"which":[2],"aims":[3],"to":[4,9,83,91,115,150,165],"assign":[5],"a":[6,13,17,96,111,123,174,178],"topic":[7,66],"label":[8,170],"each":[10],"document,":[11],"plays":[12],"fundamental":[14],"role":[15],"in":[16,28,46,85],"wide":[18,179],"variety":[19],"of":[20,25,43,99,104,169,181,188],"applications.":[21],"Despite":[22],"the":[23,73,127,137,146,157,167,186],"success":[24],"existing":[26],"studies":[27],"conventional":[29],"supervised":[30,113],"document":[31],"classification,":[32],"they":[33],"are":[34,81],"less":[35],"concerned":[36],"with":[37,118],"two":[38,106],"real":[39],"problems:":[40],"(1)the":[41],"presence":[42],"metadata":[44,62,144],":":[45],"many":[47,191],"domains,":[48],"text":[49,117,142],"is":[50],"accompanied":[51],"by":[52,136],"various":[53],"additional":[54],"information":[55],"such":[56],"as":[57,64],"authors":[58],"and":[59,68,133,143],"tags.":[60],"Such":[61],"serve":[63],"compelling":[65],"indicators":[67],"should":[69],"be":[70,92],"leveraged":[71],"into":[72,145],"categorization":[74,89],"framework;":[75],"(2)label":[76],"scarcity:":[77],"labeled":[78],"training":[79,163],"samples":[80,164],"expensive":[82],"obtain":[84],"some":[86],"cases,":[87],"where":[88],"needs":[90],"performed":[93],"using":[94],"only":[95],"small":[97],"set":[98],"annotated":[100],"data.":[101],"In":[102],"recognition":[103],"these":[105],"challenges,":[107],"we":[108,121,140,161],"propose":[109],"MetaCat,":[110],"minimally":[112],"framework":[114],"categorize":[116],"metadata.":[119,134],"Specifically,":[120],"develop":[122],"generative":[124,138,159],"process":[125],"describing":[126],"relationships":[128],"between":[129],"words,":[130],"documents,":[131],"labels,":[132],"Guided":[135],"model,":[139],"embed":[141],"same":[147,158],"semantic":[148],"space":[149],"encode":[151],"heterogeneous":[152],"signals.":[153],"Then,":[154],"based":[155],"on":[156,177],"process,":[160],"synthesize":[162],"address":[166],"bottleneck":[168],"scarcity.":[171],"We":[172],"conduct":[173],"thorough":[175],"evaluation":[176],"range":[180],"datasets.":[182],"Experimental":[183],"results":[184],"prove":[185],"effectiveness":[187],"MetaCat":[189],"over":[190],"competitive":[192],"baselines.":[193]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":10}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
