{"id":"https://openalex.org/W7155565011","doi":"https://doi.org/10.1145/3799830.3799844","title":"Labeling Function Generation for NER Task: Limitation of LLMs","display_name":"Labeling Function Generation for NER Task: Limitation of LLMs","publication_year":2025,"publication_date":"2025-12-17","ids":{"openalex":"https://openalex.org/W7155565011","doi":"https://doi.org/10.1145/3799830.3799844"},"language":null,"primary_location":{"id":"doi:10.1145/3799830.3799844","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3799830.3799844","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM IKDD International Conference on Data Science","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3799830.3799844","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5122754605","display_name":"Ajanta Maurya","orcid":null},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Ajanta Maurya","raw_affiliation_strings":["IIT Guwahati, Guwahati, India"],"raw_orcid":"https://orcid.org/0009-0004-3972-2227","affiliations":[{"raw_affiliation_string":"IIT Guwahati, Guwahati, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134542060","display_name":"Jay Khinchi","orcid":"https://orcid.org/0009-0008-8557-3575"},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Jay Khinchi","raw_affiliation_strings":["IIT Guwahati, Guwahati, India"],"raw_orcid":"https://orcid.org/0009-0008-8557-3575","affiliations":[{"raw_affiliation_string":"IIT Guwahati, Guwahati, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134496582","display_name":"V Vijaya Saradhi","orcid":"https://orcid.org/0000-0002-7856-5322"},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"V Vijaya Saradhi","raw_affiliation_strings":["IIT Guwahati, Guwahati, India"],"raw_orcid":"https://orcid.org/0000-0002-7856-5322","affiliations":[{"raw_affiliation_string":"IIT Guwahati, Guwahati, India","institution_ids":["https://openalex.org/I1317621060"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5134467683","display_name":"Ashish Anand","orcid":"https://orcid.org/0000-0002-0024-3358"},"institutions":[{"id":"https://openalex.org/I1317621060","display_name":"Indian Institute of Technology Guwahati","ror":"https://ror.org/0022nd079","country_code":"IN","type":"education","lineage":["https://openalex.org/I1317621060"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Ashish Anand","raw_affiliation_strings":["IIT Guwahati, Guwahati, India"],"raw_orcid":"https://orcid.org/0000-0002-0024-3358","affiliations":[{"raw_affiliation_string":"IIT Guwahati, Guwahati, India","institution_ids":["https://openalex.org/I1317621060"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5122754605"],"corresponding_institution_ids":["https://openalex.org/I1317621060"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.86665064,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"124","last_page":"133"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6879000067710876,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.6879000067710876,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.06710000336170197,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.028200000524520874,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5738000273704529},{"id":"https://openalex.org/keywords/function","display_name":"Function (biology)","score":0.5231999754905701},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.4415999948978424},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.4291999936103821},{"id":"https://openalex.org/keywords/subject-matter-expert","display_name":"Subject-matter expert","score":0.39100000262260437},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.38600000739097595},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.3458999991416931}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5738000273704529},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5486000180244446},{"id":"https://openalex.org/C14036430","wikidata":"https://www.wikidata.org/wiki/Q3736076","display_name":"Function (biology)","level":2,"score":0.5231999754905701},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4708999991416931},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44200000166893005},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.4415999948978424},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.4291999936103821},{"id":"https://openalex.org/C105002631","wikidata":"https://www.wikidata.org/wiki/Q4833645","display_name":"Subject-matter expert","level":3,"score":0.39100000262260437},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.38600000739097595},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3458999991416931},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.33730000257492065},{"id":"https://openalex.org/C2992860105","wikidata":"https://www.wikidata.org/wiki/Q7631392","display_name":"Subject matter","level":3,"score":0.3043000102043152},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.2971000075340271},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.2732999920845032},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.26989999413490295},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.26080000400543213},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.2606000006198883},{"id":"https://openalex.org/C67277372","wikidata":"https://www.wikidata.org/wiki/Q7449085","display_name":"Semantic role labeling","level":3,"score":0.25850000977516174},{"id":"https://openalex.org/C132010649","wikidata":"https://www.wikidata.org/wiki/Q189222","display_name":"Intuition","level":2,"score":0.25589999556541443},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.25459998846054077}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3799830.3799844","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3799830.3799844","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM IKDD International Conference on Data Science","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3799830.3799844","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3799830.3799844","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 13th ACM IKDD International Conference on Data Science","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.5155921578407288,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W2144578941","https://openalex.org/W2769041395","https://openalex.org/W2890931111","https://openalex.org/W2895604144","https://openalex.org/W3092571094","https://openalex.org/W3169442836","https://openalex.org/W3173277985","https://openalex.org/W3177255321","https://openalex.org/W3185341429","https://openalex.org/W3200428054","https://openalex.org/W4283705032","https://openalex.org/W4312782109","https://openalex.org/W4317641493","https://openalex.org/W4366085876","https://openalex.org/W4394579953","https://openalex.org/W4404782964","https://openalex.org/W4409348215","https://openalex.org/W4412377138","https://openalex.org/W7133224126"],"related_works":[],"abstract_inverted_index":{"Labeling":[0],"Functions":[1],"(LFs)":[2],"are":[3,19,120,206],"an":[4,177],"important":[5],"component":[6],"in":[7,39,63,155,166,171],"Programmatic":[8],"Weak":[9],"Supervision":[10],"(PWS)":[11],"pipeline":[12],"introducing":[13],"scalability":[14],"for":[15,86,116,147,214,224],"annotation":[16],"tasks.":[17],"LFs":[18,41,85,115,146,185],"built":[20],"by":[21,26,235],"the":[22,29,79,103,161,183,222,232],"subject":[23],"matter":[24],"expert":[25],"looking":[27],"at":[28,209],"dataset.":[30],"Large":[31],"Language":[32],"Models":[33],"(LLMs)":[34],"have":[35,59,188],"recently":[36],"proven":[37],"effective":[38],"generating":[40,210],"to":[42,83,136,187],"support":[43],"weak":[44],"supervision":[45],"across":[46],"tasks":[47,217],"such":[48],"as":[49],"review":[50],"classification,":[51,53],"topic":[52],"and":[54,106,160,173,230],"sentiment":[55,76],"analysis.":[56],"These":[57],"successes":[58],"largely":[60],"been":[61],"demonstrated":[62],"text":[64],"classification":[65,88,216],"settings,":[66],"where":[67],"rules":[68,233],"can":[69,112],"leverage":[70],"surface-level":[71],"cues":[72],"like":[73,218],"keywords":[74],"or":[75],"indicators.":[77],"However,":[78],"use":[80],"of":[81,102,122,153,164],"LLMs":[82,111,119,205,236],"generate":[84,113],"token":[87,215],"tasks\u2014particularly":[89],"Named":[90],"Entity":[91],"Recognition":[92],"(NER)":[93],"\u2014":[94],"remains":[95],"significantly":[96],"understudied.":[97],"This":[98,220],"paper":[99],"presents":[100],"one":[101],"first":[104],"empirical":[105],"analytical":[107],"investigations":[108],"into":[109,227],"whether":[110],"generalizable":[114],"NER.":[117,219],"While":[118],"capable":[121],"annotating":[123],"entity":[124],"spans":[125],"when":[126],"prompted":[127,145],"directly,":[128],"our":[129],"experiments":[130],"reveal":[131],"that":[132,140,182,203],"they":[133],"consistently":[134],"struggle":[135],"produce":[137],"labeling":[138],"logic":[139,213],"is":[141],"semantically":[142,211],"generalized.":[143],"Through":[144,176],"NER":[148],"we":[149,180],"got":[150],"highest":[151],"F1-score":[152,163],"16.66":[154],"Gemini-2.0-flash":[156],"through":[157],"few-shot":[158,174],"setting":[159],"lowest":[162],"0":[165],"LLama":[167],"2":[168],"Chat":[169],"70b":[170],"zero-shot":[172],"setting.":[175],"evaluation":[178],"framework,":[179],"show":[181,202],"generated":[184,234],"tend":[186],"high":[189],"coverage":[190],"but":[191],"low":[192],"abstraction,":[193],"exhibiting":[194],"fragile":[195],"performance":[196],"on":[197],"unseen":[198],"data.":[199],"The":[200],"results":[201],"current":[204],"not":[207],"good":[208],"rich":[212],"highlights":[221],"need":[223],"deeper":[225],"research":[226],"how":[228],"interpretable":[229],"robust":[231],"really":[237],"are.":[238]},"counts_by_year":[],"updated_date":"2026-04-25T06:06:54.107920","created_date":"2026-04-25T00:00:00"}
