{"id":"https://openalex.org/W7140441539","doi":"https://doi.org/10.48550/arxiv.2603.24222","title":"Variation is the Norm: Embracing Sociolinguistics in NLP","display_name":"Variation is the Norm: Embracing Sociolinguistics in NLP","publication_year":2026,"publication_date":"2026-03-25","ids":{"openalex":"https://openalex.org/W7140441539","doi":"https://doi.org/10.48550/arxiv.2603.24222"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.24222","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24222","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.24222","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114658138","display_name":"Anne-Marie Lutgen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lutgen, Anne-Marie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089914235","display_name":"Alistair Plum","orcid":"https://orcid.org/0000-0003-0977-3467"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Plum, Alistair","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007661580","display_name":"Verena Blaschke","orcid":"https://orcid.org/0000-0002-1082-2740"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Blaschke, Verena","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5130695812","display_name":"Barbara Plank","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Plank, Barbara","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5005933561","display_name":"Christoph Purschke","orcid":"https://orcid.org/0000-0002-9655-2058"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Purschke, Christoph","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3050999939441681,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3050999939441681,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.16760000586509705,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.09080000221729279,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/variation","display_name":"Variation (astronomy)","score":0.9386000037193298},{"id":"https://openalex.org/keywords/sociolinguistics","display_name":"Sociolinguistics","score":0.664900004863739},{"id":"https://openalex.org/keywords/dimension","display_name":"Dimension (graph theory)","score":0.47130000591278076},{"id":"https://openalex.org/keywords/language-change","display_name":"Language change","score":0.33660000562667847},{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.3046000003814697},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.3043999969959259},{"id":"https://openalex.org/keywords/on-language","display_name":"On Language","score":0.29350000619888306}],"concepts":[{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.9386000037193298},{"id":"https://openalex.org/C28519872","wikidata":"https://www.wikidata.org/wiki/Q160845","display_name":"Sociolinguistics","level":2,"score":0.664900004863739},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6100000143051147},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.5437999963760376},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.53329998254776},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47850000858306885},{"id":"https://openalex.org/C33676613","wikidata":"https://www.wikidata.org/wiki/Q13415176","display_name":"Dimension (graph theory)","level":2,"score":0.47130000591278076},{"id":"https://openalex.org/C2776095024","wikidata":"https://www.wikidata.org/wiki/Q524648","display_name":"Language change","level":2,"score":0.33660000562667847},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C2779313563","wikidata":"https://www.wikidata.org/wiki/Q17072565","display_name":"On Language","level":2,"score":0.29350000619888306},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.28949999809265137},{"id":"https://openalex.org/C2777532361","wikidata":"https://www.wikidata.org/wiki/Q687185","display_name":"Lexicalization","level":2,"score":0.27059999108314514},{"id":"https://openalex.org/C109359841","wikidata":"https://www.wikidata.org/wiki/Q728944","display_name":"Inclusion (mineral)","level":2,"score":0.2669999897480011},{"id":"https://openalex.org/C194232998","wikidata":"https://www.wikidata.org/wiki/Q1606712","display_name":"Transition (genetics)","level":3,"score":0.2623000144958496},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.2599000036716461},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2581000030040741}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.24222","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24222","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.24222","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.24222","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.8054305911064148,"display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"In":[0],"Natural":[1],"Language":[2],"Processing":[3],"(NLP),":[4],"variation":[5,28,59,121,143,156,176],"is":[6,19,32,98],"typically":[7],"seen":[8],"as":[9,161],"noise":[10],"and":[11,111],"\"normalised":[12],"away\"":[13],"before":[14],"processing,":[15],"even":[16],"though":[17],"it":[18],"an":[20,84],"integral":[21],"part":[22],"of":[23,45,51,91,108,119,154,175,188],"language.":[24],"Conversely,":[25],"studying":[26],"language":[27,46,86],"in":[29,64,68,105,122,144,157,177,184],"social":[30],"contexts":[31],"central":[33],"to":[34,40,124,127,137,167],"sociolinguistics.":[35,189],"We":[36,53],"present":[37],"a":[38,65,79,88,116,134],"framework":[39,171,187],"combine":[41],"the":[42,48,71,106,128,139,145,152,158,173,178,185],"sociolinguistic":[43],"dimension":[44,50],"with":[47,115],"technical":[49],"NLP.":[52],"argue":[54],"that":[55],"by":[56,141],"embracing":[57],"sociolinguistics,":[58],"can":[60],"actively":[61],"be":[62],"included":[63],"research":[66,159],"setup,":[67,160],"turn":[69],"informing":[70],"NLP":[72,96],"side.":[73],"To":[74],"illustrate":[75],"this,":[76],"we":[77,132],"provide":[78,133],"case":[80,149],"study":[81,150],"on":[82,113],"Luxembourgish,":[83],"evolving":[85],"featuring":[87],"large":[89,103,117],"amount":[90,118],"orthographic":[92,120],"variation,":[93],"demonstrating":[94],"how":[95],"performance":[97,107,140],"impacted.":[99],"The":[100],"results":[101],"show":[102],"discrepancies":[104],"models":[109,162],"tested":[110],"fine-tuned":[112],"data":[114,125],"comparison":[123],"closer":[126],"(orthographic)":[129],"standard.":[130],"Furthermore,":[131],"possible":[135],"solution":[136],"improve":[138],"including":[142,155],"fine-tuning":[146],"process.":[147],"This":[148],"highlights":[151],"importance":[153],"are":[163],"currently":[164],"not":[165],"robust":[166],"occurring":[168],"variation.":[169],"Our":[170],"facilitates":[172],"inclusion":[174],"thought-process":[179],"while":[180],"also":[181],"being":[182],"grounded":[183],"theoretical":[186]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-03-27T00:00:00"}
