{"id":"https://openalex.org/W7122441723","doi":"https://doi.org/10.1145/3777867.3777878","title":"HALO-GPT:Hindi Active Learning with Oracle GPT-3.5","display_name":"HALO-GPT:Hindi Active Learning with Oracle GPT-3.5","publication_year":2025,"publication_date":"2025-12-17","ids":{"openalex":"https://openalex.org/W7122441723","doi":"https://doi.org/10.1145/3777867.3777878"},"language":null,"primary_location":{"id":"doi:10.1145/3777867.3777878","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777867.3777878","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th annual meeting of the Forum for Information Retrieval Evaluation","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3777867.3777878","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122754605","display_name":"Ajanta Maurya","orcid":null},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Ajanta Maurya","raw_affiliation_strings":["Indian Institute of Technology Guwahati, Guwahati, Assam, India"],"raw_orcid":"https://orcid.org/0009-0004-3972-2227","affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Guwahati, Guwahati, Assam, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122778884","display_name":"V. Vijaya Saradhi","orcid":null},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"V. Vijaya Saradhi","raw_affiliation_strings":["Indian Institute of Technology Guwahati, Guwahati, Assam, India"],"raw_orcid":"https://orcid.org/0000-0002-7856-5322","affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Guwahati, Guwahati, Assam, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040626471","display_name":"Ashish Anand","orcid":"https://orcid.org/0000-0002-0024-3358"},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ashish Anand","raw_affiliation_strings":["Indian Institute of Technology Guwahati, Guwahati, Assam, India"],"raw_orcid":"https://orcid.org/0000-0002-0024-3358","affiliations":[{"raw_affiliation_string":"Indian Institute of Technology Guwahati, Guwahati, Assam, India","institution_ids":["https://openalex.org/I1317621060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5122754605"],"corresponding_institution_ids":["https://openalex.org/I1317621060"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81679467,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"113","last_page":"123"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4505999982357025,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.4505999982357025,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.1274999976158142,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.09210000187158585,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/oracle","display_name":"Oracle","score":0.7239000201225281},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.70169997215271},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6822999715805054},{"id":"https://openalex.org/keywords/active-learning","display_name":"Active learning (machine learning)","score":0.6204000115394592},{"id":"https://openalex.org/keywords/hindi","display_name":"Hindi","score":0.5490000247955322},{"id":"https://openalex.org/keywords/bottleneck","display_name":"Bottleneck","score":0.5336999893188477},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.36899998784065247}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.776199996471405},{"id":"https://openalex.org/C55166926","wikidata":"https://www.wikidata.org/wiki/Q2892946","display_name":"Oracle","level":2,"score":0.7239000201225281},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.70169997215271},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6822999715805054},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6721000075340271},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6220999956130981},{"id":"https://openalex.org/C77967617","wikidata":"https://www.wikidata.org/wiki/Q4677561","display_name":"Active learning (machine learning)","level":2,"score":0.6204000115394592},{"id":"https://openalex.org/C519982507","wikidata":"https://www.wikidata.org/wiki/Q1568","display_name":"Hindi","level":2,"score":0.5490000247955322},{"id":"https://openalex.org/C2780513914","wikidata":"https://www.wikidata.org/wiki/Q18210350","display_name":"Bottleneck","level":2,"score":0.5336999893188477},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5116999745368958},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.36899998784065247},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.3513000011444092},{"id":"https://openalex.org/C148524875","wikidata":"https://www.wikidata.org/wiki/Q6975395","display_name":"F1 score","level":2,"score":0.3416999876499176},{"id":"https://openalex.org/C58973888","wikidata":"https://www.wikidata.org/wiki/Q1041418","display_name":"Semi-supervised learning","level":2,"score":0.30880001187324524},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.3086000084877014},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3059000074863434},{"id":"https://openalex.org/C22367795","wikidata":"https://www.wikidata.org/wiki/Q7625208","display_name":"Structured prediction","level":2,"score":0.2849999964237213},{"id":"https://openalex.org/C2777889803","wikidata":"https://www.wikidata.org/wiki/Q25047676","display_name":"Named entity","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.2587999999523163},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.25429999828338623}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3777867.3777878","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777867.3777878","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th annual meeting of the Forum for Information Retrieval Evaluation","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3777867.3777878","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3777867.3777878","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 17th annual meeting of the Forum for Information Retrieval Evaluation","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W1956471287","https://openalex.org/W2052643363","https://openalex.org/W2065010255","https://openalex.org/W2164777277","https://openalex.org/W2343514605","https://openalex.org/W2785787385","https://openalex.org/W2886890203","https://openalex.org/W2903440991","https://openalex.org/W2927032858","https://openalex.org/W2953010755","https://openalex.org/W2957824372","https://openalex.org/W2997918300","https://openalex.org/W3035097673","https://openalex.org/W3035390927","https://openalex.org/W3121137019","https://openalex.org/W3167077707","https://openalex.org/W3205626500","https://openalex.org/W4206648492","https://openalex.org/W4384662964","https://openalex.org/W4389518684","https://openalex.org/W4391567285","https://openalex.org/W4392669965","https://openalex.org/W4394579953","https://openalex.org/W4398150831","https://openalex.org/W4404826821"],"related_works":[],"abstract_inverted_index":{"Obtaining":[0],"high-quality":[1],"annotations":[2],"for":[3,11,18,76,94,109,146],"low-resource":[4,147],"(LR)":[5],"languages":[6],"remains":[7],"a":[8,99],"significant":[9],"bottleneck":[10],"training":[12],"supervised":[13],"deep":[14],"learning":[15,38],"models,":[16],"including":[17],"Named":[19],"Entity":[20],"Recognition":[21],"(NER)":[22],"tasks.":[23],"This":[24],"work":[25],"investigates":[26],"the":[27,72,126,133,138],"use":[28,139],"of":[29,119,128,140],"GPT-3.5":[30,83,141],"as":[31,142],"an":[32,36,143],"oracle":[33],"annotator":[34,145],"within":[35],"active":[37,144],"(AL)":[39],"framework":[40],"to":[41,69],"minimize":[42],"human":[43],"effort":[44],"while":[45,82],"preserving":[46],"annotation":[47],"quality.":[48],"We":[49],"evaluate":[50],"its":[51],"performance":[52,93,108],"across":[53],"six":[54],"diverse":[55],"Hindi":[56],"NER":[57,148],"datasets,":[58,106],"spanning":[59],"general,":[60],"medical,":[61],"and":[62,87,112,122],"code-mixed":[63],"domains,":[64],"using":[65],"uncertainty-based":[66],"sampling":[67],"strategies":[68],"iteratively":[70],"select":[71],"most":[73],"informative":[74],"sentences":[75],"labeling.":[77],"Our":[78],"experiments":[79],"reveal":[80],"that":[81],"struggles":[84],"with":[85],"domain-specific":[86],"low-frequency":[88],"entities,":[89],"it":[90],"maintains":[91],"strong":[92],"common":[95],"entity":[96],"types.":[97],"Despite":[98],"decrease":[100],"in":[101,150],"average":[102],"scores":[103],"on":[104],"challenging":[105],"per-entity":[107],"Person,":[110],"Organization,":[111],"Location":[113],"remained":[114],"competitive,":[115],"achieving":[116],"maximum":[117],"F1-scores":[118],"0.82,":[120],"0.78,":[121],"0.81,":[123],"respectively.":[124],"To":[125],"best":[127],"our":[129],"knowledge,":[130],"this":[131],"is":[132],"first":[134],"large-scale":[135],"study":[136],"demonstrating":[137],"tasks":[149],"Indian":[151],"language.":[152]},"counts_by_year":[],"updated_date":"2026-01-13T01:18:01.560105","created_date":"2026-01-13T00:00:00"}
