{"id":"https://openalex.org/W7124289735","doi":"https://doi.org/10.48550/arxiv.2601.09633","title":"TaxoBell: Gaussian Box Embeddings for Self-Supervised Taxonomy Expansion","display_name":"TaxoBell: Gaussian Box Embeddings for Self-Supervised Taxonomy Expansion","publication_year":2026,"publication_date":"2026-01-14","ids":{"openalex":"https://openalex.org/W7124289735","doi":"https://doi.org/10.48550/arxiv.2601.09633"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.09633","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.09633","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.09633","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048570466","display_name":"Sahil Mishra","orcid":"https://orcid.org/0000-0001-5477-9003"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mishra, Sahil","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101476712","display_name":"Srinitish Srinivasan","orcid":"https://orcid.org/0009-0007-0122-8890"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srinivasan, Srinitish","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085995102","display_name":"Srikanta Bedathur","orcid":"https://orcid.org/0000-0002-3949-2175"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bedathur, Srikanta","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123140573","display_name":"Tanmoy Chakraborty","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chakraborty, Tanmoy","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5048570466"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.7896000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.7896000146865845,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.06669999659061432,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.054499998688697815,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.5934000015258789},{"id":"https://openalex.org/keywords/encode","display_name":"ENCODE","score":0.5552999973297119},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.48980000615119934},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4489000141620636},{"id":"https://openalex.org/keywords/gaussian","display_name":"Gaussian","score":0.42910000681877136},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.39640000462532043},{"id":"https://openalex.org/keywords/multiset","display_name":"Multiset","score":0.3431999981403351}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.614300012588501},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.5934000015258789},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.5552999973297119},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.48980000615119934},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.46369999647140503},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45170000195503235},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4489000141620636},{"id":"https://openalex.org/C163716315","wikidata":"https://www.wikidata.org/wiki/Q901177","display_name":"Gaussian","level":2,"score":0.42910000681877136},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.39640000462532043},{"id":"https://openalex.org/C2779623528","wikidata":"https://www.wikidata.org/wiki/Q864377","display_name":"Multiset","level":2,"score":0.3431999981403351},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.3359000086784363},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32269999384880066},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.31450000405311584},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3095000088214874},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.3034000098705292},{"id":"https://openalex.org/C203519979","wikidata":"https://www.wikidata.org/wiki/Q865360","display_name":"Jaccard index","level":3,"score":0.2955000102519989},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29269999265670776},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.2838999927043915},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27140000462532043},{"id":"https://openalex.org/C13336665","wikidata":"https://www.wikidata.org/wiki/Q125977","display_name":"Vector space","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C161584116","wikidata":"https://www.wikidata.org/wiki/Q1952580","display_name":"Multivariate statistics","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C155846161","wikidata":"https://www.wikidata.org/wiki/Q1143367","display_name":"Graphical model","level":2,"score":0.26739999651908875},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.26100000739097595},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2549999952316284},{"id":"https://openalex.org/C2780276568","wikidata":"https://www.wikidata.org/wiki/Q191928","display_name":"Polysemy","level":2,"score":0.2515000104904175}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.09633","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.09633","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.09633","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.09633","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Decent work and economic growth","id":"https://metadata.un.org/sdg/8","score":0.6370499134063721}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Taxonomies":[0],"form":[1],"the":[2,42,70,159],"backbone":[3],"of":[4,76,124,163],"structured":[5],"knowledge":[6],"representation":[7],"across":[8],"diverse":[9],"domains,":[10],"enabling":[11,57],"applications":[12],"such":[13],"as":[14],"e-commerce":[15],"and":[16,25,38,59,79,104,113,127,151,161,168],"semantic":[17,77,111],"search.":[18],"Yet,":[19],"manual":[20],"taxonomy":[21,144],"expansion":[22,145],"is":[23],"labor-intensive":[24],"slow.":[26],"Existing":[27],"methods":[28],"rely":[29],"on":[30,133],"point-based":[31],"vector":[32],"embeddings,":[33],"which":[34],"model":[35],"symmetric":[36],"similarity":[37],"thus":[39],"struggle":[40],"with":[41,92,165],"asymmetric":[43],"relationships":[44],"that":[45,99,138],"are":[46],"fundamental":[47],"to":[48,83],"taxonomies.":[49],"Box":[50],"embeddings":[51],"offer":[52],"a":[53,94],"promising":[54],"alternative":[55],"by":[56,147],"containment":[58],"disjointness,":[60],"but":[61],"they":[62],"face":[63],"key":[64],"issues:":[65],"(i)":[66],"unstable":[67],"gradients":[68],"at":[69],"intersection":[71],"boundaries,":[72],"(ii)":[73],"no":[74],"notion":[75],"uncertainty,":[78],"(iii)":[80],"limited":[81],"capacity":[82],"represent":[84],"polysemy":[85],"or":[86],"ambiguity.":[87],"We":[88,156],"address":[89],"these":[90],"shortcomings":[91],"TaxoBell,":[93],"Gaussian":[95,106],"box":[96,102],"embedding":[97],"framework":[98],"translates":[100],"between":[101],"geometries":[103],"multivariate":[105],"distributions,":[107],"where":[108],"means":[109],"encode":[110,115],"location":[112],"covariances":[114],"uncertainty.":[116],"Energy-based":[117],"optimization":[118],"yields":[119],"stable":[120],"optimization,":[121],"robust":[122],"modeling":[123],"ambiguous":[125],"concepts,":[126],"interpretable":[128],"hierarchical":[129],"reasoning.":[130],"Extensive":[131],"experiments":[132],"five":[134],"benchmark":[135],"datasets":[136],"demonstrate":[137,158],"TaxoBell":[139,164],"significantly":[140],"outperforms":[141],"eight":[142],"state-of-the-art":[143],"baselines":[146],"19%":[148],"in":[149,154],"MRR":[150],"around":[152],"25%":[153],"Recall@k.":[155],"further":[157],"advantages":[160],"pitfalls":[162],"error":[166],"analysis":[167],"ablation":[169],"studies.":[170]},"counts_by_year":[],"updated_date":"2026-06-02T06:17:35.589633","created_date":"2026-01-16T00:00:00"}
