{"id":"https://openalex.org/W2890881297","doi":"https://doi.org/10.18653/v1/d18-1224","title":"Regular Expression Guided Entity Mention Mining from Noisy Web Data","display_name":"Regular Expression Guided Entity Mention Mining from Noisy Web Data","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2890881297","doi":"https://doi.org/10.18653/v1/d18-1224","mag":"2890881297"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d18-1224","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1224","pdf_url":"https://www.aclweb.org/anthology/D18-1224.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D18-1224.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046910090","display_name":"Shanshan Zhang","orcid":"https://orcid.org/0000-0002-6802-8769"},"institutions":[{"id":"https://openalex.org/I84392919","display_name":"Temple University","ror":"https://ror.org/00kx1jb78","country_code":"US","type":"education","lineage":["https://openalex.org/I84392919"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Shanshan Zhang","raw_affiliation_strings":["Computer and Information Sciences Department Temple University, PA"],"affiliations":[{"raw_affiliation_string":"Computer and Information Sciences Department Temple University, PA","institution_ids":["https://openalex.org/I84392919"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102742994","display_name":"Lihong He","orcid":null},"institutions":[{"id":"https://openalex.org/I84392919","display_name":"Temple University","ror":"https://ror.org/00kx1jb78","country_code":"US","type":"education","lineage":["https://openalex.org/I84392919"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lihong He","raw_affiliation_strings":["Computer and Information Sciences Department Temple University, PA"],"affiliations":[{"raw_affiliation_string":"Computer and Information Sciences Department Temple University, PA","institution_ids":["https://openalex.org/I84392919"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059847153","display_name":"Slobodan Vu\u010deti\u0107","orcid":"https://orcid.org/0000-0001-5884-6293"},"institutions":[{"id":"https://openalex.org/I84392919","display_name":"Temple University","ror":"https://ror.org/00kx1jb78","country_code":"US","type":"education","lineage":["https://openalex.org/I84392919"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Slobodan Vucetic","raw_affiliation_strings":["Computer and Information Sciences Department Temple University, PA"],"affiliations":[{"raw_affiliation_string":"Computer and Information Sciences Department Temple University, PA","institution_ids":["https://openalex.org/I84392919"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057346703","display_name":"Eduard Dragut","orcid":"https://orcid.org/0000-0002-3103-054X"},"institutions":[{"id":"https://openalex.org/I84392919","display_name":"Temple University","ror":"https://ror.org/00kx1jb78","country_code":"US","type":"education","lineage":["https://openalex.org/I84392919"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eduard Dragut","raw_affiliation_strings":["Computer and Information Sciences Department Temple University, PA"],"affiliations":[{"raw_affiliation_string":"Computer and Information Sciences Department Temple University, PA","institution_ids":["https://openalex.org/I84392919"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5046910090"],"corresponding_institution_ids":["https://openalex.org/I84392919"],"apc_list":null,"apc_paid":null,"fwci":2.7032,"has_fulltext":true,"cited_by_count":25,"citation_normalized_percentile":{"value":0.92375016,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1991","last_page":"2000"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.837879478931427},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6702660322189331},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.6028916239738464},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.5868061184883118},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5822358727455139},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.5318911671638489},{"id":"https://openalex.org/keywords/entity-linking","display_name":"Entity linking","score":0.5140427350997925},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44256827235221863},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.4297920763492584},{"id":"https://openalex.org/keywords/regular-expression","display_name":"Regular expression","score":0.4181174337863922},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3994331955909729},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3785156309604645},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.22115039825439453},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.12786269187927246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.837879478931427},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6702660322189331},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.6028916239738464},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.5868061184883118},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5822358727455139},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.5318911671638489},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.5140427350997925},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44256827235221863},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.4297920763492584},{"id":"https://openalex.org/C121329065","wikidata":"https://www.wikidata.org/wiki/Q185612","display_name":"Regular expression","level":2,"score":0.4181174337863922},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3994331955909729},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3785156309604645},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.22115039825439453},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.12786269187927246},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/d18-1224","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1224","pdf_url":"https://www.aclweb.org/anthology/D18-1224.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/d18-1224","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d18-1224","pdf_url":"https://www.aclweb.org/anthology/D18-1224.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.4000000059604645}],"awards":[{"id":"https://openalex.org/G2653471122","display_name":null,"funder_award_id":"1546480","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320332161","display_name":"National Institutes of Health","ror":"https://ror.org/01cwqze88"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2890881297.pdf","grobid_xml":"https://content.openalex.org/works/W2890881297.grobid-xml"},"referenced_works_count":38,"referenced_works":["https://openalex.org/W72959484","https://openalex.org/W1493490255","https://openalex.org/W1508480967","https://openalex.org/W1775135849","https://openalex.org/W1832693441","https://openalex.org/W1940872118","https://openalex.org/W1964189668","https://openalex.org/W1987538593","https://openalex.org/W2020278455","https://openalex.org/W2026213144","https://openalex.org/W2038941723","https://openalex.org/W2039532210","https://openalex.org/W2047477415","https://openalex.org/W2059383863","https://openalex.org/W2080666934","https://openalex.org/W2097998348","https://openalex.org/W2106950427","https://openalex.org/W2127978399","https://openalex.org/W2134150392","https://openalex.org/W2138857742","https://openalex.org/W2148540243","https://openalex.org/W2152043697","https://openalex.org/W2155541015","https://openalex.org/W2265846598","https://openalex.org/W2275294428","https://openalex.org/W2296283641","https://openalex.org/W2311110368","https://openalex.org/W2597655663","https://openalex.org/W2602288119","https://openalex.org/W2604184171","https://openalex.org/W2799010330","https://openalex.org/W2963625095","https://openalex.org/W2963687836","https://openalex.org/W2963703197","https://openalex.org/W2964284687","https://openalex.org/W2964343412","https://openalex.org/W3106003309","https://openalex.org/W4294375521"],"related_works":["https://openalex.org/W2186562580","https://openalex.org/W3198729192","https://openalex.org/W4255258373","https://openalex.org/W2593907245","https://openalex.org/W3000685722","https://openalex.org/W2520117834","https://openalex.org/W626980589","https://openalex.org/W4313162113","https://openalex.org/W1884363728","https://openalex.org/W3133906981"],"abstract_inverted_index":{"Many":[0],"important":[1],"entity":[2,98,130,168,210],"types":[3],"in":[4,39,55],"web":[5,35,82,133],"documents,":[6],"such":[7,49,79],"as":[8,50,93],"dates,":[9],"times,":[10],"email":[11],"addresses,":[12],"and":[13,37,87,120,170,195],"course":[14],"numbers,":[15],"follow":[16],"or":[17,140],"closely":[18],"resemble":[19],"patterns":[20],"that":[21,66,74,214],"can":[22],"be":[23],"described":[24],"by":[25,138],"Regular":[26],"Expressions":[27],"(REs).":[28],"Due":[29],"to":[30,64,70,104,115,162,176,188],"a":[31,56,72,94,125,146,158,171,183,190],"vast":[32],"diversity":[33],"of":[34,53,77,109,112,149,193],"documents":[36,83,194],"ways":[38,103],"which":[40],"they":[41],"are":[42,154],"being":[43],"generated,":[44],"even":[45],"seemingly":[46],"straightforward":[47],"tasks":[48],"identifying":[51,78],"mentions":[52,169],"date":[54],"document":[57,160],"become":[58],"very":[59,223],"challenging.":[60],"It":[61],"is":[62,68,75,174,186,199],"reasonable":[63],"claim":[65],"it":[67],"impossible":[69],"create":[71],"RE":[73],"capable":[76],"entities":[80],"from":[81,117,132],"with":[84],"perfect":[85],"precision":[86],"recall.":[88],"Rather":[89],"than":[90],"abandoning":[91],"REs":[92,144,153],"go-to":[95],"approach":[96,123],"for":[97,129,145,166],"detection,":[99],"this":[100],"paper":[101],"explores":[102],"combine":[105],"the":[106,142,167,196,215],"expressive":[107],"power":[108],"REs,":[110],"ability":[111],"deep":[113],"learning":[114],"learn":[116],"large":[118,159],"data,":[119],"human-in-the":[121],"loop":[122],"into":[124],"new":[126],"integrated":[127],"framework":[128,136,217],"identification":[131,211],"data.":[134],"The":[135,205],"starts":[137],"creating":[139],"collecting":[141],"existing":[143],"particular":[147],"type":[148],"an":[150],"entity.":[151],"Those":[152],"then":[155],"used":[156],"over":[157],"corpus":[161],"collect":[163],"weak":[164,180],"labels":[165],"neural":[172,197],"network":[173,198],"trained":[175],"predict":[177],"those":[178,203],"RE-generated":[179],"labels.":[181],"Finally,":[182],"human":[184,225],"expert":[185],"asked":[187],"label":[189],"small":[191],"set":[192],"fine":[200],"tuned":[201],"on":[202,208],"documents.":[204],"experimental":[206],"evaluation":[207],"several":[209],"problems":[212],"shows":[213],"proposed":[216],"achieves":[218],"impressive":[219],"accuracy,":[220],"while":[221],"requiring":[222],"modest":[224],"effort.":[226]},"counts_by_year":[{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":4}],"updated_date":"2026-03-18T14:38:29.013473","created_date":"2025-10-10T00:00:00"}
