{"id":"https://openalex.org/W2076083849","doi":"https://doi.org/10.3115/1117794.1117800","title":"Sample selection for statistical grammar induction","display_name":"Sample selection for statistical grammar induction","publication_year":2000,"publication_date":"2000-01-01","ids":{"openalex":"https://openalex.org/W2076083849","doi":"https://doi.org/10.3115/1117794.1117800","mag":"2076083849"},"language":"en","primary_location":{"id":"doi:10.3115/1117794.1117800","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117800","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=1117800&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"http://dl.acm.org/ft_gateway.cfm?id=1117800&type=pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5070135550","display_name":"Rebecca Hwa","orcid":"https://orcid.org/0000-0003-1158-7014"},"institutions":[{"id":"https://openalex.org/I2801851002","display_name":"Harvard University Press","ror":"https://ror.org/006v7bf86","country_code":"US","type":"other","lineage":["https://openalex.org/I136199984","https://openalex.org/I2801851002"]},{"id":"https://openalex.org/I136199984","display_name":"Harvard University","ror":"https://ror.org/03vek6s52","country_code":"US","type":"education","lineage":["https://openalex.org/I136199984"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Rebecca Hwa","raw_affiliation_strings":["Harvard University, Cambridge, MA","Harvard University, Cambridge, Ma.#TAB#"],"affiliations":[{"raw_affiliation_string":"Harvard University, Cambridge, MA","institution_ids":["https://openalex.org/I2801851002"]},{"raw_affiliation_string":"Harvard University, Cambridge, Ma.#TAB#","institution_ids":["https://openalex.org/I136199984"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":["https://openalex.org/A5070135550"],"corresponding_institution_ids":["https://openalex.org/I136199984","https://openalex.org/I2801851002"],"apc_list":null,"apc_paid":null,"fwci":7.4301,"has_fulltext":true,"cited_by_count":69,"citation_normalized_percentile":{"value":0.97072242,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"13","issue":null,"first_page":"45","last_page":"52"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8513804078102112},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7484703063964844},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.7307004928588867},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7069488763809204},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.700398325920105},{"id":"https://openalex.org/keywords/grammar-induction","display_name":"Grammar induction","score":0.6933614015579224},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6880171895027161},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6630496382713318},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6324129700660706},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.617236852645874},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.6133362650871277},{"id":"https://openalex.org/keywords/workload","display_name":"Workload","score":0.5493743419647217},{"id":"https://openalex.org/keywords/rule-based-machine-translation","display_name":"Rule-based machine translation","score":0.5437740683555603},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.46778759360313416},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4487062692642212},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.13397842645645142}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8513804078102112},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7484703063964844},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.7307004928588867},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7069488763809204},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.700398325920105},{"id":"https://openalex.org/C56601403","wikidata":"https://www.wikidata.org/wiki/Q5593673","display_name":"Grammar induction","level":3,"score":0.6933614015579224},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6880171895027161},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6630496382713318},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6324129700660706},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.617236852645874},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.6133362650871277},{"id":"https://openalex.org/C2778476105","wikidata":"https://www.wikidata.org/wiki/Q628539","display_name":"Workload","level":2,"score":0.5493743419647217},{"id":"https://openalex.org/C53893814","wikidata":"https://www.wikidata.org/wiki/Q7378909","display_name":"Rule-based machine translation","level":2,"score":0.5437740683555603},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.46778759360313416},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4487062692642212},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.13397842645645142},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3115/1117794.1117800","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117800","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=1117800&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.13.2363","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.13.2363","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/W/W00/W00-1306.pdf","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.37.807","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.37.807","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://www.eecs.harvard.edu/~rebecca/sample.ps","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/1117794.1117800","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117800","pdf_url":"http://dl.acm.org/ft_gateway.cfm?id=1117800&type=pdf","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1963006333","display_name":"Advances in Language Modeling through Lexico-Grammatical    Methods","funder_award_id":"9712068","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2076083849.pdf","grobid_xml":"https://content.openalex.org/works/W2076083849.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W50453584","https://openalex.org/W1528361845","https://openalex.org/W1567570606","https://openalex.org/W1590983731","https://openalex.org/W1632114991","https://openalex.org/W1978470410","https://openalex.org/W1986543644","https://openalex.org/W2044442377","https://openalex.org/W2047706513","https://openalex.org/W2065010255","https://openalex.org/W2085989833","https://openalex.org/W2089951031","https://openalex.org/W2099111195","https://openalex.org/W2110607519","https://openalex.org/W2114663556","https://openalex.org/W2138433202","https://openalex.org/W2151023586","https://openalex.org/W2154250549","https://openalex.org/W2294348221","https://openalex.org/W2439178139","https://openalex.org/W2788084335","https://openalex.org/W2788296320","https://openalex.org/W2950925769","https://openalex.org/W3088560083","https://openalex.org/W4230030242","https://openalex.org/W4245826738"],"related_works":["https://openalex.org/W3046984657","https://openalex.org/W2053943328","https://openalex.org/W123468065","https://openalex.org/W2118045671","https://openalex.org/W1517025915","https://openalex.org/W4237776144","https://openalex.org/W1974236250","https://openalex.org/W3144287057","https://openalex.org/W2170837769","https://openalex.org/W1487802415"],"abstract_inverted_index":{"Corpus-based":[0],"grammar":[1],"induction":[2],"relies":[3],"on":[4],"using":[5],"many":[6],"hand-parsed":[7],"sentences":[8],"as":[9],"training":[10,17,46,64],"examples.":[11],"However,":[12],"the":[13,39,45,50,53,60,73,76],"construction":[14],"of":[15,41,52,62,75],"a":[16,27],"corpus":[18],"with":[19],"detailed":[20],"syntactic":[21],"analysis":[22],"for":[23],"every":[24],"sentence":[25],"is":[26],"labor-intensive":[28],"task.":[29],"We":[30],"propose":[31],"to":[32,37],"use":[33],"sample":[34],"selection":[35],"methods":[36],"minimize":[38],"amount":[40,61],"annotation":[42],"needed":[43],"in":[44],"data,":[47],"thereby":[48],"reducing":[49],"workload":[51],"human":[54],"annotators.":[55],"This":[56],"paper":[57],"shows":[58],"that":[59],"annotated":[63],"data":[65],"can":[66],"be":[67],"reduced":[68],"by":[69],"36%":[70],"without":[71],"degrading":[72],"quality":[74],"induced":[77],"grammars.":[78]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
