{"id":"https://openalex.org/W7147363514","doi":"https://doi.org/10.48550/arxiv.2603.27653","title":"The Degree of Language Diacriticity and Its Effect on Tasks","display_name":"The Degree of Language Diacriticity and Its Effect on Tasks","publication_year":2026,"publication_date":"2026-03-29","ids":{"openalex":"https://openalex.org/W7147363514","doi":"https://doi.org/10.48550/arxiv.2603.27653"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2603.27653","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27653","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2603.27653","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5132680936","display_name":"Adi Cohen","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Cohen, Adi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5008477284","display_name":"Yuval Pinter","orcid":"https://orcid.org/0000-0003-3174-1621"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Pinter, Yuval","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5132680936"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.24809999763965607,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12090","display_name":"Language and cultural evolution","score":0.24809999763965607,"subfield":{"id":"https://openalex.org/subfields/3316","display_name":"Cultural Studies"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.20819999277591705,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13155","display_name":"Digital Communication and Language","score":0.1257999986410141,"subfield":{"id":"https://openalex.org/subfields/1709","display_name":"Human-Computer Interaction"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6377000212669373},{"id":"https://openalex.org/keywords/degree","display_name":"Degree (music)","score":0.42170000076293945},{"id":"https://openalex.org/keywords/orthographic-projection","display_name":"Orthographic projection","score":0.33149999380111694},{"id":"https://openalex.org/keywords/diversity","display_name":"Diversity (politics)","score":0.32710000872612},{"id":"https://openalex.org/keywords/association","display_name":"Association (psychology)","score":0.32010000944137573},{"id":"https://openalex.org/keywords/orthography","display_name":"Orthography","score":0.31779998540878296},{"id":"https://openalex.org/keywords/second-language","display_name":"Second language","score":0.3124000132083893}],"concepts":[{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6377000212669373},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6348999738693237},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5322999954223633},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5123000144958496},{"id":"https://openalex.org/C2775997480","wikidata":"https://www.wikidata.org/wiki/Q586277","display_name":"Degree (music)","level":2,"score":0.42170000076293945},{"id":"https://openalex.org/C175694140","wikidata":"https://www.wikidata.org/wiki/Q980329","display_name":"Orthographic projection","level":2,"score":0.33149999380111694},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.32710000872612},{"id":"https://openalex.org/C142853389","wikidata":"https://www.wikidata.org/wiki/Q744778","display_name":"Association (psychology)","level":2,"score":0.32010000944137573},{"id":"https://openalex.org/C150670947","wikidata":"https://www.wikidata.org/wiki/Q43091","display_name":"Orthography","level":3,"score":0.31779998540878296},{"id":"https://openalex.org/C2987567764","wikidata":"https://www.wikidata.org/wiki/Q125421","display_name":"Second language","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.29649999737739563},{"id":"https://openalex.org/C78148190","wikidata":"https://www.wikidata.org/wiki/Q8192","display_name":"Writing system","level":2,"score":0.2892000079154968},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.27619999647140503},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.26969999074935913},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.26589998602867126},{"id":"https://openalex.org/C509619924","wikidata":"https://www.wikidata.org/wiki/Q17130757","display_name":"Structural complexity","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2655999958515167},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.26330000162124634}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2603.27653","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27653","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2603.27653","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2603.27653","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8376600742340088,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Diacritics":[0],"are":[1,147],"orthographic":[2,189],"marks":[3],"that":[4,79,128,174,188],"clarify":[5],"pronunciation,":[6],"distinguish":[7],"similar":[8],"words,":[9],"or":[10],"alter":[11],"meaning.":[12],"They":[13],"play":[14],"a":[15,68],"central":[16],"role":[17],"in":[18,41,97],"many":[19],"writing":[20,55],"systems,":[21],"yet":[22],"their":[23],"impact":[24],"on":[25,58,115],"language":[26],"technology":[27],"has":[28,38],"not":[29,192],"been":[30],"systematically":[31],"quantified":[32],"across":[33,129],"scripts.":[34,105],"While":[35],"prior":[36],"work":[37],"examined":[39],"diacritics":[40,119],"individual":[42],"languages,":[43,99,130],"there's":[44],"no":[45],"cross-linguistic,":[46],"data-driven":[47,69],"framework":[48,70],"for":[49,71,198],"measuring":[50],"the":[51,81,116,163,181],"degree":[52],"to":[53],"which":[54],"systems":[56],"rely":[57],"them":[59],"and":[60,84,103,123,151],"how":[61,109],"this":[62],"affects":[63],"downstream":[64],"tasks.":[65],"We":[66,90,106,126],"propose":[67],"quantifying":[72],"diacritic":[73,110,132,178,184],"complexity":[74,111,133,161,190],"using":[75],"corpus-level,":[76],"information-theoretic":[77],"metrics":[78,93],"capture":[80],"frequency,":[82],"ambiguity,":[83],"structural":[85,152,160],"diversity":[86],"of":[87,118,177,183],"character-diacritic":[88,145],"combinations.":[89],"compute":[91],"these":[92],"over":[94],"24":[95],"corpora":[96],"15":[98],"spanning":[100],"both":[101],"single-":[102],"multi-diacritic":[104,157],"then":[107],"examine":[108],"correlates":[112],"with":[113,137,166],"performance":[114,182],"task":[117],"restoration,":[120],"evaluating":[121],"BERT-":[122],"RNN-based":[124],"models.":[125],"find":[127],"higher":[131],"is":[134,191],"strongly":[135],"associated":[136],"lower":[138],"restoration":[139,185],"accuracy.":[140],"In":[141,156],"single-diacritic":[142],"scripts,":[143,158],"where":[144],"combinations":[146],"more":[148],"predictable,":[149],"frequency-based":[150,169],"measures":[153],"largely":[154],"align.":[155],"however,":[159],"exhibits":[162],"strongest":[164],"association":[165],"performance,":[167],"surpassing":[168],"measures.":[170],"These":[171],"findings":[172],"show":[173],"measurable":[175],"properties":[176],"usage":[179],"influence":[180],"models,":[186],"demonstrating":[187],"only":[193],"descriptive":[194],"but":[195],"functionally":[196],"relevant":[197],"modeling.":[199]},"counts_by_year":[],"updated_date":"2026-04-02T13:53:19.096889","created_date":"2026-04-02T00:00:00"}
