{"id":"https://openalex.org/W2919000501","doi":"https://doi.org/10.1162/tacl_a_00248","title":"Surface Statistics of an Unknown Language Indicate How to Parse It","display_name":"Surface Statistics of an Unknown Language Indicate How to Parse It","publication_year":2018,"publication_date":"2018-12-01","ids":{"openalex":"https://openalex.org/W2919000501","doi":"https://doi.org/10.1162/tacl_a_00248","mag":"2919000501"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00248","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00248","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00248/1567672/tacl_a_00248.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00248/1567672/tacl_a_00248.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5043674345","display_name":"Dingquan Wang","orcid":"https://orcid.org/0000-0003-2055-6711"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Dingquan Wang","raw_affiliation_strings":["Department of Computer Science, Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052467896","display_name":"Jason Eisner","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jason Eisner","raw_affiliation_strings":["Department of Computer Science, Johns Hopkins University"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5043674345","https://openalex.org/A5052467896"],"corresponding_institution_ids":["https://openalex.org/I145311948"],"apc_list":null,"apc_paid":null,"fwci":3.2155,"has_fulltext":true,"cited_by_count":19,"citation_normalized_percentile":{"value":0.93703883,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":"6","issue":null,"first_page":"667","last_page":"685"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8986632823944092},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.8092431426048279},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.7543272972106934},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7201880216598511},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.5048874020576477},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.48146381974220276},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.47613075375556946},{"id":"https://openalex.org/keywords/grammar","display_name":"Grammar","score":0.46207359433174133},{"id":"https://openalex.org/keywords/extractor","display_name":"Extractor","score":0.44276896119117737},{"id":"https://openalex.org/keywords/dependency-grammar","display_name":"Dependency grammar","score":0.43624112010002136},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.1643473207950592}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8986632823944092},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.8092431426048279},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7543272972106934},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7201880216598511},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.5048874020576477},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.48146381974220276},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.47613075375556946},{"id":"https://openalex.org/C26022165","wikidata":"https://www.wikidata.org/wiki/Q8091","display_name":"Grammar","level":2,"score":0.46207359433174133},{"id":"https://openalex.org/C117978034","wikidata":"https://www.wikidata.org/wiki/Q5422192","display_name":"Extractor","level":2,"score":0.44276896119117737},{"id":"https://openalex.org/C164883195","wikidata":"https://www.wikidata.org/wiki/Q674834","display_name":"Dependency grammar","level":3,"score":0.43624112010002136},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.1643473207950592},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C21880701","wikidata":"https://www.wikidata.org/wiki/Q2144042","display_name":"Process engineering","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00248","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00248","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00248/1567672/tacl_a_00248.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:3ffa728b68ee4ff78273dccd01ffc520","is_oa":true,"landing_page_url":"https://doaj.org/article/3ffa728b68ee4ff78273dccd01ffc520","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 6 (2021)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00248","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00248","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00248/1567672/tacl_a_00248.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8299999833106995}],"awards":[{"id":"https://openalex.org/G6639588237","display_name":"RI: Small: Linguistic Structure in Neural Sequence Models","funder_award_id":"1718846","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320317236","display_name":"Maryland Advanced Research Computing Center","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2919000501.pdf","grobid_xml":"https://content.openalex.org/works/W2919000501.grobid-xml"},"referenced_works_count":67,"referenced_works":["https://openalex.org/W52985019","https://openalex.org/W66054154","https://openalex.org/W145849181","https://openalex.org/W328458486","https://openalex.org/W565549431","https://openalex.org/W650948056","https://openalex.org/W1480017073","https://openalex.org/W1495446613","https://openalex.org/W1533861849","https://openalex.org/W1854214752","https://openalex.org/W1894835849","https://openalex.org/W1978470410","https://openalex.org/W1999993003","https://openalex.org/W2007858530","https://openalex.org/W2016630033","https://openalex.org/W2052449326","https://openalex.org/W2097826433","https://openalex.org/W2100249433","https://openalex.org/W2105580583","https://openalex.org/W2105847779","https://openalex.org/W2115774663","https://openalex.org/W2116410915","https://openalex.org/W2116967773","https://openalex.org/W2121170334","https://openalex.org/W2137807494","https://openalex.org/W2143954309","https://openalex.org/W2144499799","https://openalex.org/W2152691628","https://openalex.org/W2153208746","https://openalex.org/W2153568660","https://openalex.org/W2153579005","https://openalex.org/W2156064360","https://openalex.org/W2161914416","https://openalex.org/W2163352728","https://openalex.org/W2169126000","https://openalex.org/W2170716495","https://openalex.org/W2183293350","https://openalex.org/W2250313959","https://openalex.org/W2250397939","https://openalex.org/W2250520759","https://openalex.org/W2250762689","https://openalex.org/W2251138842","https://openalex.org/W2251205543","https://openalex.org/W2251400573","https://openalex.org/W2251677149","https://openalex.org/W2252236349","https://openalex.org/W2294835092","https://openalex.org/W2301095666","https://openalex.org/W2462912498","https://openalex.org/W2494423583","https://openalex.org/W2522657307","https://openalex.org/W2538197161","https://openalex.org/W2563157576","https://openalex.org/W2564663390","https://openalex.org/W2572905271","https://openalex.org/W2758640791","https://openalex.org/W2891933973","https://openalex.org/W2914096745","https://openalex.org/W2963139548","https://openalex.org/W2963521186","https://openalex.org/W2963571341","https://openalex.org/W2963737810","https://openalex.org/W2964084097","https://openalex.org/W2964121744","https://openalex.org/W2964199361","https://openalex.org/W3104723404","https://openalex.org/W4299668319"],"related_works":["https://openalex.org/W2251084681","https://openalex.org/W287510790","https://openalex.org/W2968543375","https://openalex.org/W2571817549","https://openalex.org/W1541975828","https://openalex.org/W2250525544","https://openalex.org/W2159336305","https://openalex.org/W2987141700","https://openalex.org/W2953770453","https://openalex.org/W2130795788"],"abstract_inverted_index":{"We":[0,13,90],"introduce":[1],"a":[2,10,68,76,84],"novel":[3],"framework":[4],"for":[5],"delexicalized":[6],"dependency":[7],"parsing":[8,104],"in":[9,52,61,112],"new":[11],"language.":[12,64,144],"show":[14,91],"that":[15,71,87,135,165,185],"useful":[16],"features":[17,40,128,134],"of":[18,33,78,109,142,158],"the":[19,53,62,100,113,143,169],"target":[20,63],"language":[21],"can":[22],"be":[23],"extracted":[24],"automatically":[25],"from":[26,99,122],"an":[27,156],"unparsed":[28,101,123,170],"corpus,":[29],"which":[30],"consists":[31],"only":[32],"gold":[34],"part-of-speech":[35],"(POS)":[36],"sequences.":[37],"Providing":[38],"these":[39],"to":[41,47],"our":[42,56,125],"neural":[43],"parser":[44],"enables":[45],"it":[46,66,82],"parse":[48],"sequences":[49],"like":[50],"those":[51],"corpus.":[54],"Strikingly,":[55],"system":[57,70],"has":[58],"no":[59],"supervision":[60],"Rather,":[65],"is":[67,72],"multilingual":[69],"trained":[73],"end-to-end":[74],"on":[75,151],"variety":[77],"other":[79],"languages,":[80],"so":[81],"learns":[83],"feature":[85],"extractor":[86],"works":[88],"well.":[89],"experimentally":[92],"across":[93],"multiple":[94],"languages:":[95],"(1)":[96],"Features":[97],"computed":[98,121],"corpus":[102],"improve":[103],"accuracy.":[105],"(2)":[106],"Including":[107],"thousands":[108],"synthetic":[110],"languages":[111,154,190],"training":[114,189],"yields":[115],"further":[116],"improvement.":[117],"(3)":[118],"Despite":[119],"being":[120],"corpora,":[124],"learned":[126],"task-specific":[127],"beat":[129],"previous":[130],"work\u2019s":[131],"interpretable":[132],"typological":[133],"require":[136],"parsed":[137],"corpora":[138],"or":[139],"expert":[140],"categorization":[141],"Our":[145],"best":[146],"method":[147],"improved":[148],"attachment":[149],"scores":[150],"held-out":[152],"test":[153],"by":[155,177],"average":[157],"5.6":[159],"percentage":[160],"points":[161,179],"over":[162,180],"past":[163,181],"work":[164,184],"does":[166,186],"not":[167,187],"inspect":[168],"data":[171],"(McDonald":[172],"et":[173,192],"al.,":[174,193],"2011),":[175],"and":[176],"20.7":[178],"\u201cgrammar":[182],"induction\u201d":[183],"use":[188],"(Naseem":[191],"2010).":[194]},"counts_by_year":[{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":12},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
