{"id":"https://openalex.org/W2951412882","doi":"https://doi.org/10.18653/v1/p19-1491","title":"What Kind of Language Is Hard to Language-Model?","display_name":"What Kind of Language Is Hard to Language-Model?","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2951412882","doi":"https://doi.org/10.18653/v1/p19-1491","mag":"2951412882"},"language":"en","primary_location":{"id":"doi:10.18653/v1/p19-1491","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1491","pdf_url":"https://aclanthology.org/P19-1491.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/P19-1491.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030541675","display_name":"Sebastian J. Mielke","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Sabrina J. Mielke","raw_affiliation_strings":["Department of Computer Science, Johns Hopkins University","Johns Hopkins University, Baltimore, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]},{"raw_affiliation_string":"Johns Hopkins University, Baltimore, United States","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061951606","display_name":"Ryan Cotterell","orcid":"https://orcid.org/0000-0003-4080-1833"},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ryan Cotterell","raw_affiliation_strings":["Department of Computer Science, Johns Hopkins University","Johns Hopkins University, Baltimore, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]},{"raw_affiliation_string":"Johns Hopkins University, Baltimore, United States","institution_ids":["https://openalex.org/I145311948"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011757745","display_name":"Kyle Gorman","orcid":"https://orcid.org/0000-0002-4233-6595"},"institutions":[{"id":"https://openalex.org/I121847817","display_name":"The Graduate Center, CUNY","ror":"https://ror.org/00awd9g61","country_code":"US","type":"education","lineage":["https://openalex.org/I121847817"]},{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]},{"id":"https://openalex.org/I174216632","display_name":"City University of New York","ror":"https://ror.org/00453a208","country_code":"US","type":"education","lineage":["https://openalex.org/I174216632"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kyle Gorman","raw_affiliation_strings":["Google","Program in Linguistics, Graduate Center, City University of New York","City University of New York, New York, United States"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Program in Linguistics, Graduate Center, City University of New York","institution_ids":["https://openalex.org/I174216632","https://openalex.org/I121847817"]},{"raw_affiliation_string":"City University of New York, New York, United States","institution_ids":["https://openalex.org/I174216632"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020068498","display_name":"Brian Roark","orcid":null},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Brian Roark","raw_affiliation_strings":["Google","Google (United States), Mountain View, United States"],"affiliations":[{"raw_affiliation_string":"Google","institution_ids":["https://openalex.org/I1291425158"]},{"raw_affiliation_string":"Google (United States), Mountain View, United States","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052467896","display_name":"Jason Eisner","orcid":null},"institutions":[{"id":"https://openalex.org/I145311948","display_name":"Johns Hopkins University","ror":"https://ror.org/00za53h95","country_code":"US","type":"education","lineage":["https://openalex.org/I145311948"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jason Eisner","raw_affiliation_strings":["Department of Computer Science, Johns Hopkins University","Johns Hopkins University, Baltimore, United States"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Johns Hopkins University","institution_ids":["https://openalex.org/I145311948"]},{"raw_affiliation_string":"Johns Hopkins University, Baltimore, United States","institution_ids":["https://openalex.org/I145311948"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030541675"],"corresponding_institution_ids":["https://openalex.org/I145311948"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.05742749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"4975","last_page":"4989"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8099610805511475},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.7161476612091064},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6214926838874817},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5907065868377686},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.4994845390319824},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.4811767041683197},{"id":"https://openalex.org/keywords/cache-language-model","display_name":"Cache language model","score":0.47411802411079407},{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.4519044756889343},{"id":"https://openalex.org/keywords/parallel-corpora","display_name":"Parallel corpora","score":0.44684898853302},{"id":"https://openalex.org/keywords/pairwise-comparison","display_name":"Pairwise comparison","score":0.42164507508277893},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.33453384041786194},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.29438018798828125},{"id":"https://openalex.org/keywords/universal-networking-language","display_name":"Universal Networking Language","score":0.2513584792613983},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.1634099781513214},{"id":"https://openalex.org/keywords/comprehension-approach","display_name":"Comprehension approach","score":0.15564700961112976}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8099610805511475},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.7161476612091064},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6214926838874817},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5907065868377686},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.4994845390319824},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.4811767041683197},{"id":"https://openalex.org/C39608478","wikidata":"https://www.wikidata.org/wiki/Q5015979","display_name":"Cache language model","level":5,"score":0.47411802411079407},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.4519044756889343},{"id":"https://openalex.org/C2985367798","wikidata":"https://www.wikidata.org/wiki/Q1346592","display_name":"Parallel corpora","level":3,"score":0.44684898853302},{"id":"https://openalex.org/C184898388","wikidata":"https://www.wikidata.org/wiki/Q1435712","display_name":"Pairwise comparison","level":2,"score":0.42164507508277893},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.33453384041786194},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.29438018798828125},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.2513584792613983},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.1634099781513214},{"id":"https://openalex.org/C129353971","wikidata":"https://www.wikidata.org/wiki/Q5156949","display_name":"Comprehension approach","level":3,"score":0.15564700961112976},{"id":"https://openalex.org/C44870925","wikidata":"https://www.wikidata.org/wiki/Q37547","display_name":"Astrophysics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/p19-1491","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1491","pdf_url":"https://aclanthology.org/P19-1491.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1906.04726","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1906.04726","pdf_url":"https://arxiv.org/pdf/1906.04726","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2951412882","is_oa":true,"landing_page_url":"https://arxiv.org/abs/1906.04726","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1906.04726","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1906.04726","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/p19-1491","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/p19-1491","pdf_url":"https://aclanthology.org/P19-1491.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8399999737739563}],"awards":[{"id":"https://openalex.org/G6639588237","display_name":"RI: Small: Linguistic Structure in Neural Sequence Models","funder_award_id":"1718846","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2951412882.pdf","grobid_xml":"https://content.openalex.org/works/W2951412882.grobid-xml"},"referenced_works_count":33,"referenced_works":["https://openalex.org/W22168010","https://openalex.org/W49707343","https://openalex.org/W196214544","https://openalex.org/W1588612820","https://openalex.org/W1779279021","https://openalex.org/W2016630033","https://openalex.org/W2044916741","https://openalex.org/W2059126714","https://openalex.org/W2064675550","https://openalex.org/W2079145130","https://openalex.org/W2084086631","https://openalex.org/W2103442809","https://openalex.org/W2110065044","https://openalex.org/W2114719613","https://openalex.org/W2130126144","https://openalex.org/W2158600149","https://openalex.org/W2250523604","https://openalex.org/W2407998047","https://openalex.org/W2524429381","https://openalex.org/W2574640638","https://openalex.org/W2577537660","https://openalex.org/W2601979355","https://openalex.org/W2727642071","https://openalex.org/W2741029840","https://openalex.org/W2792376130","https://openalex.org/W2798597276","https://openalex.org/W2803214681","https://openalex.org/W2807188009","https://openalex.org/W2892145626","https://openalex.org/W2962739339","https://openalex.org/W2962784628","https://openalex.org/W2963979492","https://openalex.org/W2964017394"],"related_works":["https://openalex.org/W2953092638","https://openalex.org/W2997781365","https://openalex.org/W2251946391","https://openalex.org/W28418033","https://openalex.org/W1843513176","https://openalex.org/W3122021937","https://openalex.org/W3166595320","https://openalex.org/W3197797121","https://openalex.org/W3188354133","https://openalex.org/W3114089626","https://openalex.org/W3092675073","https://openalex.org/W2963549144","https://openalex.org/W3035547806","https://openalex.org/W1689655311","https://openalex.org/W2250248908","https://openalex.org/W2608441325","https://openalex.org/W3174168207","https://openalex.org/W3119636502","https://openalex.org/W2962736243","https://openalex.org/W3114670527"],"abstract_inverted_index":{"How":[0],"language-agnostic":[1],"are":[2,14],"current":[3,19],"state-ofthe-art":[4],"NLP":[5],"tools?":[6],"Are":[7],"there":[8],"some":[9],"types":[10],"of":[11,120,154,184],"language":[12,35,43,88,106,144],"that":[13,39,63,133,187],"easier":[15,138],"to":[16,30,82,104,139,150,166,189],"model":[17,103,117,140],"with":[18],"methods?":[20],"In":[21,74,113],"prior":[22],"work":[23],"(Cotterell":[24,170],"et":[25,171],"al.,":[26,172],"2018)":[27,173],"we":[28,77,96,131,162],"attempted":[29],"address":[31],"this":[32,75,129],"question":[33,153],"for":[34,71],"modeling,":[36],"and":[37,123,164,178],"observed":[38],"recurrent":[40],"neural":[41],"network":[42],"models":[44],"do":[45],"not":[46,136],"perform":[47],"equally":[48],"well":[49],"over":[50],"all":[51],"the":[52,58,68,72,116,152,185],"highresource":[53],"European":[54],"languages":[55,85,158],"found":[56],"in":[57,145,160,192],"Europarl":[59],"corpus.":[60,94],"We":[61],"speculated":[62],"inflectional":[64],"morphology":[65],"may":[66],"be":[67],"primary":[69],"culprit":[70],"discrepancy.":[73],"paper,":[76],"extend":[78],"these":[79],"earlier":[80,169],"experiments":[81],"cover":[83],"69":[84],"from":[86,109],"13":[87],"families":[89],"using":[90],"a":[91,98,146,193],"multilingual":[92],"Bible":[93],"Methodologically,":[95],"introduce":[97],"new":[99],"paired-sample":[100],"multiplicative":[101],"mixed-effects":[102],"obtain":[105],"difficulty":[107],"coefficients":[108],"at-least-pairwise":[110],"parallel":[111],"corpora.":[112],"other":[114],"words,":[115],"is":[118,135],"aware":[119],"inter-sentence":[121],"variation":[122],"can":[124],"handle":[125],"missing":[126],"data.":[127],"Exploiting":[128],"model,":[130],"show":[132],"\"translationese\"":[134],"any":[137],"than":[141],"natively":[142],"written":[143],"fair":[147],"comparison.":[148],"Trying":[149],"answer":[151],"what":[155],"features":[156],"difficult":[157],"have":[159],"common,":[161],"try":[163],"fail":[165],"reproduce":[167],"our":[168],"observation":[174],"about":[175],"morphological":[176],"complexity":[177,191],"instead":[179],"reveal":[180],"far":[181],"simpler":[182],"statistics":[183],"data":[186],"seem":[188],"drive":[190],"much":[194],"larger":[195],"sample.":[196]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
