{"id":"https://openalex.org/W3114232366","doi":"https://doi.org/10.1145/3437963.3441730","title":"Hierarchical Metadata-Aware Document Categorization under Weak Supervision","display_name":"Hierarchical Metadata-Aware Document Categorization under Weak Supervision","publication_year":2021,"publication_date":"2021-03-06","ids":{"openalex":"https://openalex.org/W3114232366","doi":"https://doi.org/10.1145/3437963.3441730","mag":"3114232366"},"language":"en","primary_location":{"id":"doi:10.1145/3437963.3441730","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3437963.3441730","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5003837744","display_name":"Yu Zhang","orcid":"https://orcid.org/0000-0003-0540-6758"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yu Zhang","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049591203","display_name":"Xiusi Chen","orcid":"https://orcid.org/0000-0002-9713-8000"},"institutions":[{"id":"https://openalex.org/I161318765","display_name":"University of California, Los Angeles","ror":"https://ror.org/046rm7j60","country_code":"US","type":"education","lineage":["https://openalex.org/I161318765"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Xiusi Chen","raw_affiliation_strings":["University of California, Los Angeles, Los Angeles, CA, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Los Angeles, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I161318765"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100770786","display_name":"Meng Yu","orcid":"https://orcid.org/0000-0003-2554-2888"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu Meng","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019539533","display_name":"Jiawei Han","orcid":"https://orcid.org/0000-0002-3629-2696"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiawei Han","raw_affiliation_strings":["University of Illinois at Urbana-Champaign, Urbana, IL, USA"],"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign, Urbana, IL, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5003837744"],"corresponding_institution_ids":["https://openalex.org/I157725225"],"apc_list":null,"apc_paid":null,"fwci":2.7195,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.91558789,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"770","last_page":"778"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.8643345236778259},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8628242611885071},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.6679298877716064},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.6204982995986938},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.617906928062439},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5210199356079102},{"id":"https://openalex.org/keywords/complement","display_name":"Complement (music)","score":0.5073358416557312},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4657168388366699},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.45321908593177795},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3737892210483551},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3612380027770996},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.16225695610046387}],"concepts":[{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.8643345236778259},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8628242611885071},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.6679298877716064},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.6204982995986938},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.617906928062439},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5210199356079102},{"id":"https://openalex.org/C112313634","wikidata":"https://www.wikidata.org/wiki/Q7886648","display_name":"Complement (music)","level":5,"score":0.5073358416557312},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4657168388366699},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.45321908593177795},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3737892210483551},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3612380027770996},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.16225695610046387},{"id":"https://openalex.org/C34447519","wikidata":"https://www.wikidata.org/wiki/Q179522","display_name":"Market economy","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C127716648","wikidata":"https://www.wikidata.org/wiki/Q104053","display_name":"Phenotype","level":3,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C188082640","wikidata":"https://www.wikidata.org/wiki/Q1780899","display_name":"Complementation","level":4,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3437963.3441730","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3437963.3441730","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 14th ACM International Conference on Web Search and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.6100000143051147,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G6445402324","display_name":null,"funder_award_id":"IIS-19-56151","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W1615991656","https://openalex.org/W1888005072","https://openalex.org/W1997530783","https://openalex.org/W1999418377","https://openalex.org/W2025047573","https://openalex.org/W2061873838","https://openalex.org/W2063862666","https://openalex.org/W2105573333","https://openalex.org/W2109154616","https://openalex.org/W2137165876","https://openalex.org/W2138674039","https://openalex.org/W2168380014","https://openalex.org/W2187089797","https://openalex.org/W2251292973","https://openalex.org/W2285986798","https://openalex.org/W2294774419","https://openalex.org/W2470673105","https://openalex.org/W2742940593","https://openalex.org/W2743104969","https://openalex.org/W2759136286","https://openalex.org/W2767878862","https://openalex.org/W2788667846","https://openalex.org/W2904510605","https://openalex.org/W2914602134","https://openalex.org/W2963413667","https://openalex.org/W2971324494","https://openalex.org/W2971330564","https://openalex.org/W2980708516","https://openalex.org/W2983965928","https://openalex.org/W2998640320","https://openalex.org/W3042602466","https://openalex.org/W3102317997","https://openalex.org/W3105538385","https://openalex.org/W3105705953","https://openalex.org/W4254955164"],"related_works":["https://openalex.org/W2165912799","https://openalex.org/W2735662278","https://openalex.org/W2058118494","https://openalex.org/W2392768766","https://openalex.org/W2382615723","https://openalex.org/W4311804456","https://openalex.org/W1987484445","https://openalex.org/W2382021449","https://openalex.org/W2623658258","https://openalex.org/W2806637116"],"abstract_inverted_index":{"Categorizing":[0],"documents":[1,65,133],"into":[2],"a":[3,104,125,144],"given":[4],"label":[5,79],"hierarchy":[6],"is":[7],"intuitively":[8],"appealing":[9],"due":[10],"to":[11,76,134],"the":[12,78,136,154],"ubiquity":[13],"of":[14,114,147,156],"hierarchical":[15,32],"topic":[16],"structures":[17],"in":[18,29,48],"massive":[19,38],"text":[20,45,83],"corpora.":[21],"Although":[22],"related":[23],"studies":[24,74],"have":[25],"achieved":[26],"satisfying":[27],"performance":[28],"fully":[30],"supervised":[31],"document":[33,86],"classification,":[34],"they":[35],"usually":[36],"require":[37],"human-annotated":[39],"training":[40,59,132,139],"data":[41,126,161],"and":[42,82,119,122,152,160],"only":[43],"utilize":[44],"information.":[46,70],"However,":[47],"many":[49],"domains,":[50],"(1)":[51],"annotations":[52],"are":[53,66],"quite":[54],"expensive":[55],"where":[56],"very":[57],"few":[58],"samples":[60],"can":[61],"be":[62],"acquired;":[63],"(2)":[64],"accompanied":[67],"by":[68],"metadata":[69,117],"Hence,":[71],"this":[72],"paper":[73],"how":[75],"integrate":[77],"hierarchy,":[80],"metadata,":[81],"signals":[84],"for":[85,98],"categorization":[87],"under":[88],"weak":[89],"supervision.":[90],"We":[91],"develop":[92],"HiMeCat,":[93],"an":[94],"embedding-based":[95],"generative":[96],"framework":[97],"our":[99,157],"task.":[100],"Specifically,":[101],"we":[102,123],"propose":[103],"novel":[105],"joint":[106],"representation":[107,158],"learning":[108,159],"module":[109,128],"that":[110,129],"allows":[111],"simultaneous":[112],"modeling":[113],"category":[115],"dependencies,":[116],"information":[118],"textual":[120],"semantics,":[121],"introduce":[124],"augmentation":[127,162],"hierarchically":[130],"synthesizes":[131],"complement":[135],"original,":[137],"small-scale":[138],"set.":[140],"Our":[141],"experiments":[142],"demonstrate":[143],"consistent":[145],"improvement":[146],"HiMeCat":[148],"over":[149],"competitive":[150],"baselines":[151],"validate":[153],"contribution":[155],"modules.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
