{"id":"https://openalex.org/W4412377990","doi":"https://doi.org/10.1145/3726302.3730341","title":"TAFSIL: Taxonomy Adaptable Fine-grained Entity Recognition through Distant Supervision for Indian Languages","display_name":"TAFSIL: Taxonomy Adaptable Fine-grained Entity Recognition through Distant Supervision for Indian Languages","publication_year":2025,"publication_date":"2025-07-13","ids":{"openalex":"https://openalex.org/W4412377990","doi":"https://doi.org/10.1145/3726302.3730341"},"language":"en","primary_location":{"id":"doi:10.1145/3726302.3730341","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730341","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730341","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730341","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Prachuryya Kaushik","orcid":"https://orcid.org/0009-0007-9299-4426"},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Prachuryya Kaushik","raw_affiliation_strings":["Indian Institute of Technology, Guwahati, Guwahati, India"],"raw_orcid":"https://orcid.org/0009-0007-9299-4426","affiliations":[{"raw_affiliation_string":"Indian Institute of Technology, Guwahati, Guwahati, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039157929","display_name":"Shivansh Mishra","orcid":null},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Shivansh Mishra","raw_affiliation_strings":["Indian Institute of Technology, Guwahati, Guwahati, India"],"raw_orcid":"https://orcid.org/0009-0004-7646-7691","affiliations":[{"raw_affiliation_string":"Indian Institute of Technology, Guwahati, Guwahati, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040626471","display_name":"Ashish Anand","orcid":"https://orcid.org/0000-0002-0024-3358"},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ashish Anand","raw_affiliation_strings":["Indian Institute of Technology, Guwahati, Guwahati, India"],"raw_orcid":"https://orcid.org/0000-0002-0024-3358","affiliations":[{"raw_affiliation_string":"Indian Institute of Technology, Guwahati, Guwahati, India","institution_ids":["https://openalex.org/I1317621060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.0747822,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"3753","last_page":"3763"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.989300012588501,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7645925283432007},{"id":"https://openalex.org/keywords/taxonomy","display_name":"Taxonomy (biology)","score":0.6407715082168579},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5438812375068665},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4090225398540497},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.34499409794807434},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.06052294373512268}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7645925283432007},{"id":"https://openalex.org/C58642233","wikidata":"https://www.wikidata.org/wiki/Q8269924","display_name":"Taxonomy (biology)","level":2,"score":0.6407715082168579},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5438812375068665},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4090225398540497},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.34499409794807434},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.06052294373512268},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3726302.3730341","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730341","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730341","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3726302.3730341","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730341","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730341","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412377990.pdf","grobid_xml":"https://content.openalex.org/works/W4412377990.grobid-xml"},"referenced_works_count":34,"referenced_works":["https://openalex.org/W8550301","https://openalex.org/W2251239360","https://openalex.org/W2573492843","https://openalex.org/W2592076957","https://openalex.org/W2592329103","https://openalex.org/W2595233870","https://openalex.org/W2742113707","https://openalex.org/W2759083144","https://openalex.org/W2953356739","https://openalex.org/W2962713724","https://openalex.org/W2963863756","https://openalex.org/W2970076461","https://openalex.org/W2971109883","https://openalex.org/W2998566943","https://openalex.org/W3035390927","https://openalex.org/W3035586841","https://openalex.org/W3176589722","https://openalex.org/W3177312484","https://openalex.org/W4205417736","https://openalex.org/W4205807230","https://openalex.org/W4225373829","https://openalex.org/W4225716497","https://openalex.org/W4285107543","https://openalex.org/W4285604861","https://openalex.org/W4365799947","https://openalex.org/W4379986648","https://openalex.org/W4382202646","https://openalex.org/W4385571095","https://openalex.org/W4385571222","https://openalex.org/W4385571463","https://openalex.org/W4385572044","https://openalex.org/W4385572376","https://openalex.org/W4385572425","https://openalex.org/W4389519502"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Several":[0],"studies":[1],"have":[2],"used":[3],"distant":[4],"supervision":[5],"to":[6,14,35,75,124,153],"create":[7,76,125],"resources":[8],"for":[9,28,136],"fine-grained":[10],"entity":[11,48,66],"recognition":[12],"(FgER)":[13],"mitigate":[15],"the":[16,42,51,54,97,101,161,180,184],"challenges":[17],"of":[18,22,44,53,65,127,131,163,183,192],"manual":[19],"annotation.However,":[20],"most":[21],"these":[23],"methods":[24],"are":[25,85],"primarily":[26],"developed":[27],"English":[29],"and":[30,46,105,112,118,149,158,175],"cannot":[31],"be":[32],"efficiently":[33],"adapted":[34],"many":[36],"other":[37],"languages,":[38],"including":[39],"Indian":[40,81],"languages.Moreover,":[41],"emergence":[43],"new":[45],"unseen":[47],"types":[49],"deteriorates":[50],"performance":[52,200],"supervised":[55],"models":[56],"trained":[57],"on":[58],"FgER":[59,73,77,203],"datasets":[60,78,126,168,185],"with":[61],"different":[62],"predefined":[63],"sets":[64],"types.This":[67],"work":[68],"introduces":[69],"TAFSIL,":[70],"a":[71,90,128,189],"taxonomy-adaptable":[72],"framework":[74],"in":[79,169,194],"six":[80,137],"languages.The":[82],"chosen":[83],"languages":[84,138],"spoken":[86],"by":[87,165],"more":[88],"than":[89],"billion":[91],"speakers":[92],"across":[93,201],"various":[94,167,202],"countries.TAFSIL":[95],"utilizes":[96],"high":[98],"interlink":[99],"between":[100],"knowledge":[102],"base":[103],"WikiData":[104],"linked":[106],"corpora":[107],"Wikipedia":[108],"through":[109,115],"multi-stage":[110],"heuristics":[111],"improves":[113],"annotation":[114],"fuzzy":[116],"match":[117],"quality":[119,182],"sentence":[120],"selection.TAFSIL":[121],"enables":[122],"us":[123],"total":[129],"size":[130],"around":[132],"three":[133],"million":[134],"samples":[135],"Hindi":[139],"(Hi),":[140],"Marathi":[141],"(Mr),":[142],"Sanskrit":[143],"(Sa),":[144],"Tamil":[145],"(Ta),":[146],"Telugu":[147],"(Te),":[148],"Urdu":[150],"(Ur)":[151],"belonging":[152],"two":[154],"language":[155],"families":[156],"Indo-European":[157],"Dravidian.We":[159],"evaluate":[160],"robustness":[162],"TAFSIL":[164],"creating":[166],"four":[170],"taxonomies":[171],"FIGER,":[172],"OntoNotes,":[173],"HAnDS,":[174],"MultiCoNER2.Our":[176],"extensive":[177],"experiments":[178],"suggest":[179],"sound":[181],"as":[186],"there":[187],"is":[188,207],"relative":[190],"improvement":[191],"83%":[193],"average":[195],"F1":[196],"score":[197],"over":[198],"zero-shot":[199],"state-of-the-art":[204],"models.The":[205],"resource":[206],"publicly":[208],"available":[209],"at":[210],"https://huggingface.co/datasets/prachuryyaI-ITG/TAFSIL.":[211]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
