{"id":"https://openalex.org/W2048468185","doi":"https://doi.org/10.1145/1014052.1014065","title":"Exploiting dictionaries in named entity extraction","display_name":"Exploiting dictionaries in named entity extraction","publication_year":2004,"publication_date":"2004-08-22","ids":{"openalex":"https://openalex.org/W2048468185","doi":"https://doi.org/10.1145/1014052.1014065","mag":"2048468185"},"language":"en","primary_location":{"id":"doi:10.1145/1014052.1014065","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1014052.1014065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051617344","display_name":"William W. Cohen","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"William W. Cohen","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA","Carnegie-Mellon University, Pittsburgh, PA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Carnegie-Mellon University, Pittsburgh, PA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5031035935","display_name":"Sunita Sarawagi","orcid":"https://orcid.org/0009-0005-9538-6616"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Sunita Sarawagi","raw_affiliation_strings":["IIT Bombay, Mumbai, India"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"IIT Bombay, Mumbai, India","institution_ids":["https://openalex.org/I162827531"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5051617344"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":24.0556,"has_fulltext":false,"cited_by_count":225,"citation_normalized_percentile":{"value":0.99532964,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"89","last_page":"98"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.8952363729476929},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8361066579818726},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6315237283706665},{"id":"https://openalex.org/keywords/named-entity","display_name":"Named entity","score":0.6144455075263977},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5926179885864258},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5343189835548401},{"id":"https://openalex.org/keywords/entity-linking","display_name":"Entity linking","score":0.49648863077163696},{"id":"https://openalex.org/keywords/formalism","display_name":"Formalism (music)","score":0.48994821310043335},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.48483407497406006},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.47998306155204773},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.42585867643356323},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.361555814743042},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.35488295555114746},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.10762256383895874},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.09406304359436035}],"concepts":[{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.8952363729476929},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8361066579818726},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6315237283706665},{"id":"https://openalex.org/C2777889803","wikidata":"https://www.wikidata.org/wiki/Q25047676","display_name":"Named entity","level":2,"score":0.6144455075263977},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5926179885864258},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5343189835548401},{"id":"https://openalex.org/C96711827","wikidata":"https://www.wikidata.org/wiki/Q17012245","display_name":"Entity linking","level":3,"score":0.49648863077163696},{"id":"https://openalex.org/C73301696","wikidata":"https://www.wikidata.org/wiki/Q5469984","display_name":"Formalism (music)","level":3,"score":0.48994821310043335},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.48483407497406006},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.47998306155204773},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.42585867643356323},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.361555814743042},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.35488295555114746},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.10762256383895874},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.09406304359436035},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C142362112","wikidata":"https://www.wikidata.org/wiki/Q735","display_name":"Art","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C153349607","wikidata":"https://www.wikidata.org/wiki/Q36649","display_name":"Visual arts","level":1,"score":0.0},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.0},{"id":"https://openalex.org/C558565934","wikidata":"https://www.wikidata.org/wiki/Q2743","display_name":"Musical","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1014052.1014065","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1014052.1014065","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the tenth ACM SIGKDD international conference on Knowledge discovery and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6399999856948853,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":44,"referenced_works":["https://openalex.org/W55116438","https://openalex.org/W197270748","https://openalex.org/W1491843047","https://openalex.org/W1520377376","https://openalex.org/W1563062065","https://openalex.org/W1568339100","https://openalex.org/W1575332430","https://openalex.org/W1646278814","https://openalex.org/W1714704734","https://openalex.org/W1934019294","https://openalex.org/W1953828586","https://openalex.org/W1954715867","https://openalex.org/W1968004512","https://openalex.org/W1979711143","https://openalex.org/W2008652694","https://openalex.org/W2009341227","https://openalex.org/W2009570821","https://openalex.org/W2028122758","https://openalex.org/W2029873015","https://openalex.org/W2050006154","https://openalex.org/W2067566391","https://openalex.org/W2103931177","https://openalex.org/W2104029044","https://openalex.org/W2110914363","https://openalex.org/W2118618483","https://openalex.org/W2129113961","https://openalex.org/W2139686264","https://openalex.org/W2147880316","https://openalex.org/W2154498027","https://openalex.org/W2156515921","https://openalex.org/W2158439753","https://openalex.org/W2158941357","https://openalex.org/W2162630660","https://openalex.org/W2314420468","https://openalex.org/W2485837772","https://openalex.org/W2560674852","https://openalex.org/W2785349534","https://openalex.org/W2913519381","https://openalex.org/W4245668478","https://openalex.org/W4285719527","https://openalex.org/W4290750029","https://openalex.org/W6639924009","https://openalex.org/W6680627905","https://openalex.org/W6683714698"],"related_works":["https://openalex.org/W2186562580","https://openalex.org/W2032007337","https://openalex.org/W4255258373","https://openalex.org/W2593907245","https://openalex.org/W2155874911","https://openalex.org/W3000685722","https://openalex.org/W1884363728","https://openalex.org/W3133906981","https://openalex.org/W4253099099","https://openalex.org/W3006227201"],"abstract_inverted_index":{"We":[0],"consider":[1],"the":[2,16,27,65,119,134,147],"problem":[3,17,136],"of":[4,18,29,91,106,122,133],"improving":[5],"named":[6,44],"entity":[7,45,62],"recognition":[8,46],"(NER)":[9],"systems":[10,22,47],"by":[11,23,49],"using":[12,159],"external":[13,36,160],"dictionaries---more":[14],"specifically,":[15],"extending":[19],"state-of-the-art":[20],"NER":[21,109,135],"incorporating":[24],"information":[25],"about":[26],"similarity":[28,68,113],"extracted":[30],"entities":[31,33],"to":[32,54,101],"in":[34,60,142,162],"an":[35,61],"dictionary.":[37],"This":[38],"is":[39,85],"difficult":[40],"because":[41],"most":[42,66],"high-performance":[43,108,112],"operate":[48],"sequentially":[50,88],"classifying":[51,89],"words":[52],"as":[53],"whether":[55],"or":[56],"not":[57],"they":[58],"participate":[59],"name;":[63],"however,":[64],"useful":[67,124],"measures":[69],"score":[70],"entire":[71],"candidate":[72],"names.":[73],"To":[74],"correct":[75],"this":[76,115],"mismatch":[77],"we":[78],"formalize":[79],"a":[80,103,129],"semi-Markov":[81],"extraction":[82,153],"process,":[83],"which":[84],"based":[86],"on":[87],"segments":[90],"several":[92],"adjacent":[93],"words,":[94],"rather":[95],"than":[96,137],"single":[97],"words.":[98],"In":[99],"addition":[100],"allowing":[102],"natural":[104,131],"way":[105],"coupling":[107],"methods":[110,157],"and":[111,127],"functions,":[114],"formalism":[116],"also":[117],"allows":[118],"direct":[120],"use":[121],"other":[123],"entity-level":[125],"features,":[126],"provides":[128],"more":[130],"formulation":[132],"sequential":[138],"word":[139],"classification.":[140],"Experiments":[141],"multiple":[143],"domains":[144],"show":[145],"that":[146],"new":[148],"model":[149],"can":[150],"substantially":[151],"improve":[152],"performance":[154],"over":[155],"previous":[156],"for":[158],"dictionaries":[161],"NER.":[163]},"counts_by_year":[{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2021,"cited_by_count":6},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":9},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":9},{"year":2016,"cited_by_count":12},{"year":2015,"cited_by_count":8},{"year":2014,"cited_by_count":13},{"year":2013,"cited_by_count":16},{"year":2012,"cited_by_count":16}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
