{"id":"https://openalex.org/W2029184000","doi":"https://doi.org/10.3115/1117794.1117819","title":"Statistical filtering and subcategorization frame acquisition","display_name":"Statistical filtering and subcategorization frame acquisition","publication_year":2000,"publication_date":"2000-01-01","ids":{"openalex":"https://openalex.org/W2029184000","doi":"https://doi.org/10.3115/1117794.1117819","mag":"2029184000"},"language":"en","primary_location":{"id":"doi:10.3115/1117794.1117819","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117819","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117819","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117819","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081393566","display_name":"Anna Korhonen","orcid":"https://orcid.org/0000-0002-3692-3144"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Anna Korhonen","raw_affiliation_strings":["University of Cambridge, Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge, Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081967853","display_name":"Genevieve Gorrell","orcid":"https://orcid.org/0000-0002-8324-606X"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Genevieve Gorrell","raw_affiliation_strings":["University of Cambridge, Cambridge, UK"],"affiliations":[{"raw_affiliation_string":"University of Cambridge, Cambridge, UK","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063848163","display_name":"Diana McCarthy","orcid":null},"institutions":[{"id":"https://openalex.org/I162608824","display_name":"University of Sussex","ror":"https://ror.org/00ayhx656","country_code":"GB","type":"education","lineage":["https://openalex.org/I162608824"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Diana McCarthy","raw_affiliation_strings":["University of Sussex, Brighton, UK","University of Sussex,Brighton,UK"],"affiliations":[{"raw_affiliation_string":"University of Sussex, Brighton, UK","institution_ids":["https://openalex.org/I162608824"]},{"raw_affiliation_string":"University of Sussex,Brighton,UK","institution_ids":["https://openalex.org/I162608824"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5081393566"],"corresponding_institution_ids":["https://openalex.org/I241749"],"apc_list":null,"apc_paid":null,"fwci":5.835,"has_fulltext":true,"cited_by_count":42,"citation_normalized_percentile":{"value":0.95973659,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"13","issue":null,"first_page":"199","last_page":"206"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9800999760627747,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9797999858856201,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/subcategorization","display_name":"Subcategorization","score":0.9696932435035706},{"id":"https://openalex.org/keywords/spurious-relationship","display_name":"Spurious relationship","score":0.7558642625808716},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7440979480743408},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.661508321762085},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6321779489517212},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4964984059333801},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.46894073486328125},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3778740167617798},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3586917221546173},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.29664790630340576},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07883816957473755}],"concepts":[{"id":"https://openalex.org/C70845037","wikidata":"https://www.wikidata.org/wiki/Q6980760","display_name":"Subcategorization","level":3,"score":0.9696932435035706},{"id":"https://openalex.org/C97256817","wikidata":"https://www.wikidata.org/wiki/Q1462316","display_name":"Spurious relationship","level":2,"score":0.7558642625808716},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7440979480743408},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.661508321762085},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6321779489517212},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4964984059333801},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.46894073486328125},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3778740167617798},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3586917221546173},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.29664790630340576},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07883816957473755},{"id":"https://openalex.org/C2776397901","wikidata":"https://www.wikidata.org/wiki/Q24905","display_name":"Verb","level":2,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3115/1117794.1117819","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117819","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117819","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.11.1254","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.11.1254","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ftp://ftp.cogs.sussex.ac.uk/pub/users/dianam/hypfin.ps","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.12.7593","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.12.7593","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://acl.ldc.upenn.edu/W/W00/W00-1325.pdf","raw_type":"text"}],"best_oa_location":{"id":"doi:10.3115/1117794.1117819","is_oa":true,"landing_page_url":"https://doi.org/10.3115/1117794.1117819","pdf_url":"https://dl.acm.org/doi/pdf/10.3115/1117794.1117819","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2000 Joint SIGDAT conference on Empirical methods in natural language processing and very large corpora held in conjunction with the 38th Annual Meeting of the Association for Computational Linguistics -","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2029184000.pdf","grobid_xml":"https://content.openalex.org/works/W2029184000.grobid-xml"},"referenced_works_count":29,"referenced_works":["https://openalex.org/W403234049","https://openalex.org/W1491161564","https://openalex.org/W1574901103","https://openalex.org/W1592405359","https://openalex.org/W1622422412","https://openalex.org/W1973021928","https://openalex.org/W1985177845","https://openalex.org/W1986348501","https://openalex.org/W1990508473","https://openalex.org/W1991923024","https://openalex.org/W2008172256","https://openalex.org/W2088198454","https://openalex.org/W2096945176","https://openalex.org/W2116780029","https://openalex.org/W2122978558","https://openalex.org/W2127009519","https://openalex.org/W2152311076","https://openalex.org/W2154890447","https://openalex.org/W2161349060","https://openalex.org/W2165131017","https://openalex.org/W2168524620","https://openalex.org/W2334801970","https://openalex.org/W2951833906","https://openalex.org/W4244511627","https://openalex.org/W6635485359","https://openalex.org/W6636742555","https://openalex.org/W6682505165","https://openalex.org/W7027485401","https://openalex.org/W7054500779"],"related_works":["https://openalex.org/W3113091479","https://openalex.org/W2162899405","https://openalex.org/W941090075","https://openalex.org/W2044987316","https://openalex.org/W3134374554","https://openalex.org/W2237480245","https://openalex.org/W2400253058","https://openalex.org/W2354679542","https://openalex.org/W2075065631","https://openalex.org/W1506006726"],"abstract_inverted_index":{"Research":[0],"into":[1],"the":[2,24,27,41,79],"automatic":[3],"acquisition":[4],"of":[5,26,37,81],"subcategorization":[6,54],"frames":[7,77],"(SCFs)":[8],"from":[9,51],"corpora":[10],"is":[11],"starting":[12],"to":[13,48,64,75],"produce":[14],"large-scale":[15],"computational":[16],"lexicons":[17,29],"which":[18],"include":[19],"valuable":[20],"frequency":[21],"information.":[22],"However,":[23],"accuracy":[25],"resulting":[28],"shows":[30],"room":[31],"for":[32,87,92],"improvement.":[33],"One":[34],"significant":[35],"source":[36],"error":[38],"lies":[39],"in":[40],"statistical":[42],"filtering":[43,65,76],"used":[44],"by":[45],"some":[46],"researchers":[47],"remove":[49],"noise":[50],"automatically":[52],"acquired":[53],"frames.":[55],"In":[56],"this":[57,88],"paper,":[58],"we":[59],"compare":[60],"three":[61],"different":[62],"approaches":[63],"out":[66],"spurious":[67],"hypotheses.":[68],"Two":[69],"hypothesis":[70],"tests":[71],"perform":[72],"poorly,":[73],"compared":[74],"on":[78],"basis":[80],"relative":[82],"frequency.":[83],"We":[84],"discuss":[85],"reasons":[86],"and":[89],"consider":[90],"directions":[91],"future":[93],"research.":[94]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2016,"cited_by_count":2},{"year":2014,"cited_by_count":3},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
