{"id":"https://openalex.org/W4403577366","doi":"https://doi.org/10.1145/3627673.3680009","title":"Boosting Entity Recognition by leveraging Cross-task Domain Models for Weak Supervision","display_name":"Boosting Entity Recognition by leveraging Cross-task Domain Models for Weak Supervision","publication_year":2024,"publication_date":"2024-10-20","ids":{"openalex":"https://openalex.org/W4403577366","doi":"https://doi.org/10.1145/3627673.3680009"},"language":"en","primary_location":{"id":"doi:10.1145/3627673.3680009","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3627673.3680009","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3627673.3680009","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102874124","display_name":"Sanjay Agrawal","orcid":"https://orcid.org/0009-0006-7658-7532"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Sanjay Agrawal","raw_affiliation_strings":["Amazon.com Inc., Bengaluru, India"],"affiliations":[{"raw_affiliation_string":"Amazon.com Inc., Bengaluru, India","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025202709","display_name":"Srujana Merugu","orcid":"https://orcid.org/0000-0001-7849-9743"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Srujana Merugu","raw_affiliation_strings":["Amazon.com Inc., Bengaluru, India"],"affiliations":[{"raw_affiliation_string":"Amazon.com Inc., Bengaluru, India","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5038625048","display_name":"Vivek Sembium","orcid":"https://orcid.org/0000-0002-6787-1383"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vivek Sembium","raw_affiliation_strings":["Amazon.com Inc., Bengaluru, India"],"affiliations":[{"raw_affiliation_string":"Amazon.com Inc., Bengaluru, India","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5102874124"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.76819002,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"4324","last_page":"4331"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9873999953269958,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7786744832992554},{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.7553826570510864},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6192729473114014},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.49407270550727844},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40666013956069946},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33282628655433655},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08358648419380188},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.07550838589668274},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.06222882866859436}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7786744832992554},{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.7553826570510864},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6192729473114014},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.49407270550727844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40666013956069946},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33282628655433655},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08358648419380188},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.07550838589668274},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.06222882866859436},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3627673.3680009","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3627673.3680009","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3627673.3680009","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3627673.3680009","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 33rd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W1034374084","https://openalex.org/W1598033630","https://openalex.org/W2047995599","https://openalex.org/W2122052811","https://openalex.org/W2774162863","https://openalex.org/W2785888191","https://openalex.org/W2804950764","https://openalex.org/W2904415434","https://openalex.org/W2911489562","https://openalex.org/W2963697299","https://openalex.org/W2970771982","https://openalex.org/W2981852735","https://openalex.org/W2997792775","https://openalex.org/W3012741015","https://openalex.org/W3046375318","https://openalex.org/W3099591453","https://openalex.org/W3104774463","https://openalex.org/W3217374296","https://openalex.org/W4288089799"],"related_works":["https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W1538046993","https://openalex.org/W3196817267","https://openalex.org/W1976600725"],"abstract_inverted_index":{"Entity":[0],"Recognition":[1],"(ER)":[2],"is":[3,62,73],"a":[4,12,63,98,106,133,190],"common":[5,18],"natural":[6],"language":[7],"processing":[8],"task":[9],"encountered":[10],"in":[11,126],"number":[13],"of":[14,65,71,101,109,167],"real-world":[15],"applications.":[16],"For":[17],"domains":[19,51],"and":[20,26,35,41,55,76,177],"named":[21],"entities":[22],"such":[23,38,52,84,88],"as":[24,39,53,89],"places":[25],"organisations,":[27],"there":[28,61],"exists":[29],"sufficient":[30],"high":[31],"quality":[32],"annotated":[33],"data":[34,67,114],"foundational":[36],"models":[37,150],"T5":[40],"GPT-3.5":[42],"also":[43],"provide":[44],"highly":[45],"accurate":[46],"predictions.":[47],"However,":[48],"for":[49,83],"niche":[50],"e-commerce":[54,176],"medicine":[56],"with":[57,105,143],"specialized":[58],"entity":[59,80],"types,":[60],"paucity":[64],"labeled":[66,103,111,129,171],"since":[68],"manual":[69],"labeling":[70],"tokens":[72],"often":[74],"time-consuming":[75],"expensive,":[77],"which":[78,146],"makes":[79],"recognition":[81],"challenging":[82],"domains.":[85],"Recent":[86],"works":[87],"NEEDLE":[90],"[48]":[91],"propose":[92,139],"hybrid":[93],"solutions":[94],"to":[95,115,120,158],"efficiently":[96],"combine":[97],"small":[99],"amount":[100,108],"strongly":[102],"(human-annotated)":[104],"large":[107],"weakly":[110,128,170],"(distant":[112],"supervision)":[113],"yield":[116],"superior":[117],"performance":[118],"relative":[119],"supervised":[121],"training.":[122],"The":[123],"extensive":[124],"noise":[125],"the":[127,152,165],"data,":[130],"however,":[131],"remains":[132],"challenge.":[134],"In":[135],"this":[136],"paper,":[137],"we":[138],"WeSDoM":[140,184],"(Weak":[141],"Supervision":[142],"Domain":[144],"Models),":[145],"leverages":[147],"pretrained":[148],"encoder":[149],"from":[151],"same":[153],"domain":[154,160],"but":[155],"different":[156],"tasks":[157],"create":[159],"ontologies":[161],"that":[162,183],"can":[163],"enable":[164],"creation":[166],"less":[168],"noisy":[169],"data.":[172],"Experiments":[173],"on":[174,199],"internal":[175],"public":[178],"biomedical":[179],"NER":[180,203],"datasets":[181],"demonstrate":[182],"outperforms":[185],"existing":[186],"SOTA":[187,196],"baselines":[188],"by":[189],"significant":[191],"margin.":[192],"We":[193],"achieve":[194],"new":[195],"F1":[197],"scores":[198],"two":[200],"popular":[201],"Biomedical":[202],"datasets,":[204],"BC5CDR-chem":[205],"94.27,":[206],"BC5CDR-disease":[207],"91.23.":[208]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1}],"updated_date":"2025-12-27T23:08:20.325037","created_date":"2025-10-10T00:00:00"}
