{"id":"https://openalex.org/W4396773650","doi":"https://doi.org/10.1145/3664283","title":"Automated Category Tree Construction: Hardness Bounds and Algorithms","display_name":"Automated Category Tree Construction: Hardness Bounds and Algorithms","publication_year":2024,"publication_date":"2024-05-09","ids":{"openalex":"https://openalex.org/W4396773650","doi":"https://doi.org/10.1145/3664283"},"language":"en","primary_location":{"id":"doi:10.1145/3664283","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664283","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664283","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3664283","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5080880741","display_name":"Shay Gershtein","orcid":"https://orcid.org/0009-0008-6339-8358"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Shay Gershtein","raw_affiliation_strings":["Tel Aviv University, Tel Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel Aviv University, Tel Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058183926","display_name":"Uri Avron","orcid":"https://orcid.org/0009-0001-6164-768X"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Uri Avron","raw_affiliation_strings":["Tel Aviv University, Tel Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel Aviv University, Tel Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035284207","display_name":"Ido Guy","orcid":"https://orcid.org/0000-0002-5525-1064"},"institutions":[{"id":"https://openalex.org/I124227911","display_name":"Ben-Gurion University of the Negev","ror":"https://ror.org/05tkyf982","country_code":"IL","type":"education","lineage":["https://openalex.org/I124227911"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Ido Guy","raw_affiliation_strings":["Ben-Gurion University of the Negev, Beer-Sheva, Israel"],"affiliations":[{"raw_affiliation_string":"Ben-Gurion University of the Negev, Beer-Sheva, Israel","institution_ids":["https://openalex.org/I124227911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110095410","display_name":"Tova Milo","orcid":null},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Tova Milo","raw_affiliation_strings":["Tel Aviv University, Tel Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel Aviv University, Tel Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042052657","display_name":"Slava Novgorodov","orcid":"https://orcid.org/0000-0003-4082-7128"},"institutions":[{"id":"https://openalex.org/I16391192","display_name":"Tel Aviv University","ror":"https://ror.org/04mhzgx49","country_code":"IL","type":"education","lineage":["https://openalex.org/I16391192"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Slava Novgorodov","raw_affiliation_strings":["Tel Aviv University, Tel Aviv, Israel"],"affiliations":[{"raw_affiliation_string":"Tel Aviv University, Tel Aviv, Israel","institution_ids":["https://openalex.org/I16391192"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5080880741"],"corresponding_institution_ids":["https://openalex.org/I16391192"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04993662,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"49","issue":"3","first_page":"1","last_page":"32"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.678798496723175},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6553133726119995},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.6085849404335022},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5863315463066101},{"id":"https://openalex.org/keywords/tree","display_name":"Tree (set theory)","score":0.5688410997390747},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.5119448900222778},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5090338587760925},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4816383421421051},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.4503062665462494},{"id":"https://openalex.org/keywords/upper-and-lower-bounds","display_name":"Upper and lower bounds","score":0.42550569772720337},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.4199833869934082},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.4153730571269989},{"id":"https://openalex.org/keywords/combinatorics","display_name":"Combinatorics","score":0.3777388334274292},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.35611531138420105},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.28511473536491394},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.28248587250709534}],"concepts":[{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.678798496723175},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6553133726119995},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.6085849404335022},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5863315463066101},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.5688410997390747},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.5119448900222778},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5090338587760925},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4816383421421051},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4503062665462494},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.42550569772720337},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.4199833869934082},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.4153730571269989},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.3777388334274292},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35611531138420105},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.28511473536491394},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.28248587250709534},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C78458016","wikidata":"https://www.wikidata.org/wiki/Q840400","display_name":"Evolutionary biology","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3664283","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664283","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664283","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3664283","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3664283","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3664283","source":{"id":"https://openalex.org/S90119964","display_name":"ACM Transactions on Database Systems","issn_l":"0362-5915","issn":["0362-5915","1557-4644"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Database Systems","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4396773650.pdf"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1538281624","https://openalex.org/W1562917120","https://openalex.org/W1852422273","https://openalex.org/W1964720370","https://openalex.org/W1970972959","https://openalex.org/W2010581776","https://openalex.org/W2034163095","https://openalex.org/W2035695027","https://openalex.org/W2067883080","https://openalex.org/W2078174680","https://openalex.org/W2100071287","https://openalex.org/W2126186592","https://openalex.org/W2151334846","https://openalex.org/W2161401012","https://openalex.org/W2516679817","https://openalex.org/W2768549342","https://openalex.org/W2898323380","https://openalex.org/W2984493948","https://openalex.org/W3133746906","https://openalex.org/W3174964002","https://openalex.org/W4246293990","https://openalex.org/W4250042253","https://openalex.org/W4281762853"],"related_works":["https://openalex.org/W2165912799","https://openalex.org/W2735662278","https://openalex.org/W2382615723","https://openalex.org/W4311804456","https://openalex.org/W1987484445","https://openalex.org/W2623658258","https://openalex.org/W2143413548","https://openalex.org/W151699219","https://openalex.org/W2085633464","https://openalex.org/W1966122760"],"abstract_inverted_index":{"Category":[0],"trees,":[1],"or":[2,238,343],"taxonomies,":[3],"are":[4,353],"rooted":[5],"trees":[6],"where":[7,284,336],"each":[8,214,349],"node,":[9],"called":[10],"a":[11,15,46,80,117,129,158,162,171,185,305],"category,":[12,145],"corresponds":[13],"to":[14,57,115,156,169,290,352,363],"set":[16,130],"of":[17,22,48,119,131,178,198,209,223,233,272,318,339,346],"related":[18],"items.":[19],"The":[20,166],"construction":[21,41],"taxonomies":[23],"has":[24,62,68,95],"been":[25,43,63,70,96],"studied":[26],"in":[27,77,219],"various":[28,105,243,267],"domains,":[29],"including":[30],"e-commerce,":[31],"document":[32],"management,":[33],"and":[34,51,65,88,108,246,270,365],"question":[35],"answering.":[36],"Multiple":[37],"algorithms":[38,326],"for":[39,104,161,181,242,280,332],"automating":[40],"have":[42],"proposed,":[44],"employing":[45],"variety":[47],"clustering":[49],"approaches":[50],"crowdsourcing.":[52],"However,":[53],"no":[54],"formal":[55],"model":[56,82],"capture":[58],"such":[59],"categorization":[60,118],"problems":[61],"devised,":[64],"their":[66],"complexity":[67],"not":[69,354],"studied.":[71],"To":[72],"address":[73],"this,":[74],"we":[75,99,229,295,358],"propose":[76],"this":[78,227],"work":[79,256],"combinatorial":[81],"that":[83,90,136,173,264],"captures":[84],"many":[85],"practical":[86,319,333],"settings":[87],"show":[89],"the":[91,112,120,126,137,150,175,179,196,207,210,251,273,277,281,285,291,316,337,340,344],"aforementioned":[92,252],"empirical":[93],"approach":[94],"warranted,":[97],"as":[98,142,212,313,315],"prove":[100,230],"strong":[101],"inapproximability":[102,231],"bounds":[103],"problem":[106,244,268],"variants":[107,245],"special":[109,247,282,334],"cases":[110,335],"when":[111],"goal":[113,167],"is":[114,128,154,168,191],"produce":[116,170],"maximum":[121],"utility.":[122],"In":[123],"our":[124,361],"model,":[125,228],"input":[127,152,224,274,293,341,347],"n":[132],"weighted":[133],"item":[134,201,215],"sets":[135,180,342,348],"tree":[138,172],"would":[139],"ideally":[140],"contain":[141],"categories.":[143],"Each":[144],"rather":[146],"than":[147],"perfectly":[148],"match":[149],"corresponding":[151,292],"set,":[153,294],"allowed":[155],"exceed":[157],"given":[159,163],"threshold":[160],"similarity":[164],"function.":[165],"maximizes":[174],"total":[176],"weight":[177],"which":[182,205],"it":[183],"contains":[184],"matching":[186],"category.":[187],"A":[188],"key":[189],"parameter":[190],"an":[192,200,220,297],"upper":[193],"bound":[194],"on":[195,260,304],"number":[197,222,345],"categories":[199],"may":[202,216,275],"belong":[203],"to,":[204],"produces":[206],"hardness":[208],"problem,":[211],"initially":[213],"be":[217,288],"contained":[218],"arbitrary":[221],"sets.":[225],"For":[226],"bounds,":[232],"order":[234],"\\(\\tilde{\\Theta":[235,239],"}(\\sqrt":[236],"{n})\\)":[237],"}(n)\\)":[240],",":[241],"cases,":[248],"loosely":[249],"justifying":[250],"heuristic":[253],"approach.":[254],"Our":[255],"includes":[257],"reductions":[258],"based":[259],"parameterized":[261],"randomized":[262],"constructions":[263],"highlight":[265],"how":[266],"parameters":[269],"properties":[271],"affect":[276],"hardness.":[278],"Moreover,":[279],"case":[283],"category":[286],"must":[287],"identical":[289],"devise":[296],"algorithm":[298],"whose":[299],"approximation":[300,330],"guarantee":[301],"depends":[302],"solely":[303],"more":[306],"granular":[307],"parameter,":[308],"allowing":[309],"improved":[310,329],"worst-case":[311],"guarantees,":[312],"well":[314],"application":[317],"exact":[320],"solvers.":[321],"We":[322],"further":[323],"provide":[324],"efficient":[325],"with":[327],"much":[328],"guarantees":[331],"cardinalities":[338],"items":[350],"belongs":[351],"too":[355],"large.":[356],"Finally,":[357],"also":[359],"generalize":[360],"results":[362],"DAG-based":[364],"non-hierarchical":[366],"categorization.":[367]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
