{"id":"https://openalex.org/W4409150274","doi":"https://doi.org/10.1145/3690624.3709290","title":"On the Necessity of World Knowledge for Mitigating Missing Labels in Extreme Classification","display_name":"On the Necessity of World Knowledge for Mitigating Missing Labels in Extreme Classification","publication_year":2025,"publication_date":"2025-04-04","ids":{"openalex":"https://openalex.org/W4409150274","doi":"https://doi.org/10.1145/3690624.3709290"},"language":"en","primary_location":{"id":"doi:10.1145/3690624.3709290","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3690624.3709290","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3690624.3709290","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100924456","display_name":"Jatin Prakash","orcid":null},"institutions":[{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jatin Prakash","raw_affiliation_strings":["New York University, New York City, NY, USA"],"raw_orcid":"https://orcid.org/0009-0003-5474-6529","affiliations":[{"raw_affiliation_string":"New York University, New York City, NY, USA","institution_ids":["https://openalex.org/I57206974"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5106708558","display_name":"Anirudh Buvanesh","orcid":"https://orcid.org/0000-0003-1910-2253"},"institutions":[{"id":"https://openalex.org/I4210164802","display_name":"Mila - Quebec Artificial Intelligence Institute","ror":"https://ror.org/05c22rx21","country_code":"CA","type":"facility","lineage":["https://openalex.org/I4210164802"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Anirudh Buvanesh","raw_affiliation_strings":["Mila - Quebec Artificial Intelligence Institute, Montreal, QC, Canada"],"raw_orcid":"https://orcid.org/0000-0003-1910-2253","affiliations":[{"raw_affiliation_string":"Mila - Quebec Artificial Intelligence Institute, Montreal, QC, Canada","institution_ids":["https://openalex.org/I4210164802"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089626592","display_name":"Bishal Santra","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Bishal Santra","raw_affiliation_strings":["Microsoft Research, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0002-0380-689X","affiliations":[{"raw_affiliation_string":"Microsoft Research, Bengaluru, Karnataka, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059937402","display_name":"Deepak Saini","orcid":"https://orcid.org/0000-0002-6057-4351"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Deepak Saini","raw_affiliation_strings":["Microsoft Corporation, Mountain View, CA, USA"],"raw_orcid":"https://orcid.org/0000-0002-6057-4351","affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Mountain View, CA, USA","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101941000","display_name":"Sachin Yadav","orcid":"https://orcid.org/0000-0003-0048-1118"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sachin Yadav","raw_affiliation_strings":["Google DeepMind, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0003-0048-1118","affiliations":[{"raw_affiliation_string":"Google DeepMind, Bengaluru, Karnataka, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074199611","display_name":"Jian Jiao","orcid":"https://orcid.org/0000-0003-4779-9588"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210108985","display_name":"Bellevue Hospital Center","ror":"https://ror.org/01ky34z31","country_code":"US","type":"healthcare","lineage":["https://openalex.org/I1283621791","https://openalex.org/I4210086933","https://openalex.org/I4210108985"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jian Jiao","raw_affiliation_strings":["Microsoft Corporation, Bellevue, WA, USA"],"raw_orcid":"https://orcid.org/0000-0003-4779-9588","affiliations":[{"raw_affiliation_string":"Microsoft Corporation, Bellevue, WA, USA","institution_ids":["https://openalex.org/I1290206253","https://openalex.org/I4210108985"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085551583","display_name":"Yashoteja Prabhu","orcid":null},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Yashoteja Prabhu","raw_affiliation_strings":["Microsoft Research, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0009-0005-8595-8166","affiliations":[{"raw_affiliation_string":"Microsoft Research, Bengaluru, Karnataka, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103213915","display_name":"Amit Sharma","orcid":"https://orcid.org/0000-0002-2086-3191"},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Amit Sharma","raw_affiliation_strings":["Microsoft Research, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0002-2086-3191","affiliations":[{"raw_affiliation_string":"Microsoft Research, Bengaluru, Karnataka, India","institution_ids":["https://openalex.org/I4210124949"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051880496","display_name":"Manik Varma","orcid":"https://orcid.org/0000-0003-4516-6613"},"institutions":[{"id":"https://openalex.org/I4210124949","display_name":"Microsoft Research (India)","ror":"https://ror.org/02w7f3w92","country_code":"IN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210124949"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Manik Varma","raw_affiliation_strings":["Microsoft Research, Bengaluru, Karnataka, India"],"raw_orcid":"https://orcid.org/0000-0003-4516-6613","affiliations":[{"raw_affiliation_string":"Microsoft Research, Bengaluru, Karnataka, India","institution_ids":["https://openalex.org/I4210124949"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5100924456"],"corresponding_institution_ids":["https://openalex.org/I57206974"],"apc_list":null,"apc_paid":null,"fwci":2.0951,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.874852,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1115","last_page":"1126"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9552000164985657,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9552000164985657,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9121000170707703,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6071385145187378},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3569015860557556},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3279988765716553},{"id":"https://openalex.org/keywords/knowledge-management","display_name":"Knowledge management","score":0.3221074938774109}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6071385145187378},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3569015860557556},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3279988765716553},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.3221074938774109}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3690624.3709290","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3690624.3709290","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3690624.3709290","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3690624.3709290","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM SIGKDD Conference on Knowledge Discovery and Data Mining V.1","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5799999833106995,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":60,"referenced_works":["https://openalex.org/W1583837637","https://openalex.org/W1834987204","https://openalex.org/W1991418309","https://openalex.org/W2068074736","https://openalex.org/W2069065514","https://openalex.org/W2362855512","https://openalex.org/W2507134384","https://openalex.org/W2520348554","https://openalex.org/W2606555609","https://openalex.org/W2750779823","https://openalex.org/W2782759081","https://openalex.org/W2788125153","https://openalex.org/W2899867782","https://openalex.org/W2906963924","https://openalex.org/W2914052719","https://openalex.org/W2921113176","https://openalex.org/W2945456403","https://openalex.org/W2963469388","https://openalex.org/W2983598759","https://openalex.org/W2990138404","https://openalex.org/W2998534896","https://openalex.org/W3012576969","https://openalex.org/W3080802002","https://openalex.org/W3092103025","https://openalex.org/W3093655911","https://openalex.org/W3094444847","https://openalex.org/W3114079967","https://openalex.org/W3114569718","https://openalex.org/W3117196003","https://openalex.org/W3153914981","https://openalex.org/W3156044630","https://openalex.org/W3167920080","https://openalex.org/W3169488402","https://openalex.org/W3177232285","https://openalex.org/W3201691278","https://openalex.org/W3211566171","https://openalex.org/W3212725701","https://openalex.org/W4213069590","https://openalex.org/W4221030716","https://openalex.org/W4288728031","https://openalex.org/W4306794840","https://openalex.org/W4321485459","https://openalex.org/W4365799947","https://openalex.org/W4381686872","https://openalex.org/W4385541581","https://openalex.org/W4385567541","https://openalex.org/W4387841511","https://openalex.org/W4387846540","https://openalex.org/W4389519059","https://openalex.org/W4389519448","https://openalex.org/W4389524280","https://openalex.org/W4396757563","https://openalex.org/W4400526199","https://openalex.org/W4400526284","https://openalex.org/W4400526908","https://openalex.org/W4400528870","https://openalex.org/W4400530533","https://openalex.org/W6600291067","https://openalex.org/W6600408051","https://openalex.org/W6818795685"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Extreme":[0],"Classification":[1],"(XC)":[2],"aims":[3],"to":[4,8,57,80,109,161,210],"map":[5],"a":[6,14,147,180],"query":[7],"the":[9,97,111,164,170,244],"most":[10],"relevant":[11],"documents":[12],"from":[13,29,32,44],"very":[15],"large":[16,123],"document":[17,124],"set.":[18],"XC":[19],"algorithms":[20],"used":[21],"in":[22,116,224,233],"real-world":[23],"applications":[24,117],"typically":[25],"learn":[26],"this":[27,48,74],"mapping":[28],"datasets":[30,41,178,214],"curated":[31],"implicit":[33],"feedback,":[34],"such":[35,85,186],"as":[36,86,187],"user":[37],"clicks.":[38],"However,":[39],"these":[40],"often":[42],"suffer":[43],"missing":[45,54,58,112,130,165],"labels.":[46],"In":[47],"work,":[49],"we":[50,134],"observe":[51],"that":[52,73,93,145],"systematic":[53],"labels":[55],"lead":[56],"knowledge,":[59,113],"which":[60],"is":[61,78,126],"critical":[62],"for":[63,140],"modelling":[64],"relevance":[65],"between":[66],"queries":[67],"and":[68,89,122,157,191,227,246],"documents.":[69],"We":[70,168,242],"formally":[71],"show":[72,169],"absence":[75],"of":[76,149,172,182],"knowledge":[77,131],"hard":[79],"recover":[81],"using":[82],"existing":[83,197],"methods":[84,198,221],"propensity":[87],"weighting":[88],"data":[90],"imputation":[91],"strategies":[92],"solely":[94],"rely":[95],"on":[96,175,199,239],"training":[98],"dataset.":[99],"While":[100],"Large":[101],"Language":[102,151],"Models":[103,152],"(LLMs)":[104],"provide":[105],"an":[106,143,234],"attractive":[107],"solution":[108],"augment":[110],"leveraging":[114],"them":[115],"with":[118],"low":[119],"latency":[120],"requirements":[121],"sets":[125],"challenging.":[127],"To":[128],"mitigate":[129],"at":[132],"scale,":[133],"propose":[135],"SKIM":[136,195,208],"(Scalable":[137],"Knowledge":[138],"Infusion":[139],"Missing":[141],"Labels),":[142],"algorithm":[144],"leverages":[146],"combination":[148,181],"Small":[150],"or":[153],"SLMs,":[154],"e.g.,":[155],"Llama2-7b,":[156],"abundant":[158],"unstructured":[159],"meta-data":[160],"effectively":[162],"address":[163],"label":[166],"problem.":[167],"efficacy":[171],"our":[173],"method":[174],"large-scale":[176],"public":[177],"through":[179],"unbiased":[183],"evaluation":[184,193],"strategies,":[185],"exhaustive":[188],"human":[189],"annotations":[190],"simulation-based":[192],"benchmarks.":[194],"outperforms":[196],"Recall@100":[200],"by":[201,222,231],"more":[202],"than":[203],"10":[204,216],"absolute":[205],"points.":[206],"Additionally,":[207],"scales":[209],"proprietary":[211],"query-ad":[212],"retrieval":[213],"containing":[215],"million":[217],"documents,":[218],"outperforming":[219],"baseline":[220],"12%":[223],"offline":[225],"evaluations":[226],"increasing":[228],"ad":[229],"click-yield":[230],"1.23%":[232],"online":[235],"A/B":[236],"test":[237],"conducted":[238],"Bing":[240],"Search.":[241],"release":[243],"code":[245],"trained":[247],"models":[248],"at:":[249],"github.com/bicycleman15/skim":[250]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
