{"id":"https://openalex.org/W2003458432","doi":"https://doi.org/10.3115/1117794.1117802","title":"Enriching the knowledge sources used in a maximum entropy part-of-speech tagger","display_name":"Enriching the knowledge sources used in a maximum entropy part-of-speech tagger","publication_year":2000,"publication_date":"2000-01-01","ids":{"openalex":"https://openalex.org/W2003458432","doi":"https://doi.org/10.3115/1117794.1117802","mag":"2003458432"},"language":"en","primary_location":{"id":"doi:10.3115/1117794.1117802","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117802","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117802","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117802","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053947885","display_name":"Kristina Toutanova","orcid":null},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Kristina Toutanova","raw_affiliation_strings":["Stanford, CA","Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford, CA","institution_ids":[]},{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5046006076","display_name":"Christopher D. Manning","orcid":"https://orcid.org/0000-0001-6155-649X"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Christopher D. Manning","raw_affiliation_strings":["Stanford, CA","Stanford University"],"affiliations":[{"raw_affiliation_string":"Stanford, CA","institution_ids":[]},{"raw_affiliation_string":"Stanford University","institution_ids":["https://openalex.org/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5053947885"],"corresponding_institution_ids":["https://openalex.org/I97018004"],"apc_list":null,"apc_paid":null,"fwci":3.7198,"has_fulltext":true,"cited_by_count":946,"citation_normalized_percentile":{"value":0.93165485,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"13","issue":null,"first_page":"63","last_page":"70"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9947999715805054,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/treebank","display_name":"Treebank","score":0.9115262031555176},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7751051187515259},{"id":"https://openalex.org/keywords/part-of-speech-tagging","display_name":"Part-of-speech tagging","score":0.7219029068946838},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6996808052062988},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.688737154006958},{"id":"https://openalex.org/keywords/part-of-speech","display_name":"Part of speech","score":0.62126624584198},{"id":"https://openalex.org/keywords/principle-of-maximum-entropy","display_name":"Principle of maximum entropy","score":0.6077477335929871},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.509388267993927},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.41386738419532776},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.1297445297241211}],"concepts":[{"id":"https://openalex.org/C206134035","wikidata":"https://www.wikidata.org/wiki/Q811525","display_name":"Treebank","level":3,"score":0.9115262031555176},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7751051187515259},{"id":"https://openalex.org/C2780684714","wikidata":"https://www.wikidata.org/wiki/Q1271424","display_name":"Part-of-speech tagging","level":3,"score":0.7219029068946838},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6996808052062988},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.688737154006958},{"id":"https://openalex.org/C123406163","wikidata":"https://www.wikidata.org/wiki/Q82042","display_name":"Part of speech","level":2,"score":0.62126624584198},{"id":"https://openalex.org/C9679016","wikidata":"https://www.wikidata.org/wiki/Q1417473","display_name":"Principle of maximum entropy","level":2,"score":0.6077477335929871},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.509388267993927},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.41386738419532776},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.1297445297241211},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":7,"locations":[{"id":"doi:10.3115/1117794.1117802","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117802","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117802","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.12.2996","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.12.2996","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/W/W00/W00-1308.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.294.7351","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.294.7351","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nlp.stanford.edu/kristina/papers/emnlp2000.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.332.6064","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.332.6064","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://ilpubs.stanford.edu:8090/459/1/2000-39.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.36.2734","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.36.2734","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www-nlp.stanford.edu/~manning/papers/emnlp2000.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.415.9183","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.415.9183","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.cs.jhu.edu/~yarowsky/acl2000/sigdat/toutanova.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.77.8616","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.77.8616","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://nlp.stanford.edu/cmanning/papers/emnlp2000.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/1117794.1117802","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117802","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117802","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2003458432.pdf","grobid_xml":"https://content.openalex.org/works/W2003458432.grobid-xml"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W1508165687","https://openalex.org/W1535015163","https://openalex.org/W1574901103","https://openalex.org/W1773803948","https://openalex.org/W2096175520","https://openalex.org/W2127973959","https://openalex.org/W2135843243","https://openalex.org/W2158873310","https://openalex.org/W2163791257","https://openalex.org/W2789009050","https://openalex.org/W2949108231","https://openalex.org/W2950121111"],"related_works":["https://openalex.org/W3156541658","https://openalex.org/W1965453504","https://openalex.org/W2128096361","https://openalex.org/W2396342956","https://openalex.org/W3128305375","https://openalex.org/W2626026432","https://openalex.org/W2275441552","https://openalex.org/W2363447991","https://openalex.org/W2349070910","https://openalex.org/W3022465890"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"results":[3,29],"for":[4,22,40,45,56,67],"a":[5],"maximum-entropy-based":[6],"part":[7],"of":[8,38,48,52],"speech":[9],"tagger,":[10],"which":[11],"achieves":[12],"superior":[13],"performance":[14],"principally":[15],"by":[16,30],"enriching":[17],"the":[18,46,49,68,71],"information":[19],"sources":[20],"used":[21],"tagging.":[23],"In":[24],"particular,":[25],"we":[26],"get":[27],"improved":[28],"incorporating":[31],"these":[32],"features:":[33],"(i)":[34],"more":[35],"extensive":[36],"treatment":[37],"capitalization":[39],"unknown":[41],"words;":[42],"(ii)":[43],"features":[44,55],"disambiguation":[47],"tense":[50],"forms":[51],"verbs;":[53],"(iii)":[54],"disambiguating":[57],"particles":[58],"from":[59],"prepositions":[60],"and":[61,77],"adverbs.":[62],"The":[63],"best":[64],"resulting":[65],"accuracy":[66],"tagger":[69],"on":[70,79],"Penn":[72],"Treebank":[73],"is":[74],"96.86%":[75],"overall,":[76],"86.91%":[78],"previously":[80],"unseen":[81],"words.":[82]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":37},{"year":2023,"cited_by_count":25},{"year":2022,"cited_by_count":21},{"year":2021,"cited_by_count":35},{"year":2020,"cited_by_count":38},{"year":2019,"cited_by_count":48},{"year":2018,"cited_by_count":57},{"year":2017,"cited_by_count":70},{"year":2016,"cited_by_count":64},{"year":2015,"cited_by_count":91},{"year":2014,"cited_by_count":71},{"year":2013,"cited_by_count":79},{"year":2012,"cited_by_count":70}],"updated_date":"2026-04-23T09:07:50.710637","created_date":"2025-10-10T00:00:00"}
