{"id":"https://openalex.org/W7160925329","doi":"https://doi.org/10.48550/arxiv.2605.08950","title":"Improving Lexical Difficulty Prediction with Context-Aligned Contrastive Learning and Ridge Ensembling","display_name":"Improving Lexical Difficulty Prediction with Context-Aligned Contrastive Learning and Ridge Ensembling","publication_year":2026,"publication_date":"2026-05-09","ids":{"openalex":"https://openalex.org/W7160925329","doi":"https://doi.org/10.48550/arxiv.2605.08950"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.08950","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08950","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.08950","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5116806355","display_name":"Wicaksono Leksono Muhamad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Muhamad, Wicaksono Leksono","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093210038","display_name":"Joanito Agili Lopo","orcid":"https://orcid.org/0009-0001-3183-7132"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lopo, Joanito Agili","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104237488","display_name":"Tsamarah Rana Nugraha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nugraha, Tsamarah Rana","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025997244","display_name":"Ahmad Cahyono Adi","orcid":"https://orcid.org/0000-0002-9476-8728"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Adi, Ahmad Cahyono","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120534754","display_name":"Muhammad Oriza Nurfajri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nurfajri, Muhammad Oriza","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":0,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.9790999889373779,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.004999999888241291,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.002099999925121665,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.5005000233650208},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.47699999809265137},{"id":"https://openalex.org/keywords/contrastive-analysis","display_name":"Contrastive analysis","score":0.4575999975204468},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4221999943256378},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.3978999853134155},{"id":"https://openalex.org/keywords/readability","display_name":"Readability","score":0.38749998807907104},{"id":"https://openalex.org/keywords/statistical-learning","display_name":"Statistical learning","score":0.3605000078678131},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.3411000072956085}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6830000281333923},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6452000141143799},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5878999829292297},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.5005000233650208},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.47699999809265137},{"id":"https://openalex.org/C2777629044","wikidata":"https://www.wikidata.org/wiki/Q614959","display_name":"Contrastive analysis","level":2,"score":0.4575999975204468},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4221999943256378},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.3978999853134155},{"id":"https://openalex.org/C2778143727","wikidata":"https://www.wikidata.org/wiki/Q1820650","display_name":"Readability","level":2,"score":0.38749998807907104},{"id":"https://openalex.org/C2982736386","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Statistical learning","level":2,"score":0.3605000078678131},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.3411000072956085},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3314000070095062},{"id":"https://openalex.org/C110313322","wikidata":"https://www.wikidata.org/wiki/Q7100793","display_name":"Ordinal regression","level":2,"score":0.320499986410141},{"id":"https://openalex.org/C32277403","wikidata":"https://www.wikidata.org/wiki/Q740445","display_name":"Ridge","level":2,"score":0.3084000051021576},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.3050999939441681},{"id":"https://openalex.org/C59985594","wikidata":"https://www.wikidata.org/wiki/Q1758140","display_name":"Contrastive linguistics","level":3,"score":0.28690001368522644},{"id":"https://openalex.org/C126706616","wikidata":"https://www.wikidata.org/wiki/Q2944660","display_name":"Lexical item","level":2,"score":0.2754000127315521},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2711000144481659},{"id":"https://openalex.org/C93258239","wikidata":"https://www.wikidata.org/wiki/Q6537688","display_name":"Lexical choice","level":3,"score":0.26579999923706055},{"id":"https://openalex.org/C188198153","wikidata":"https://www.wikidata.org/wiki/Q1613840","display_name":"Limiting","level":2,"score":0.26249998807907104},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.259799987077713},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.2556000053882599}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.08950","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08950","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"Preprint"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.08950","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.08950","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Preprint"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.8480530977249146,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Lexical":[0],"difficulty":[1,18,124],"prediction":[2],"is":[3],"a":[4],"fundamental":[5],"problem":[6],"in":[7],"language":[8],"learning":[9],"and":[10,49,73,107],"readability":[11],"assessment,":[12],"requiring":[13],"models":[14],"to":[15,45,119],"estimate":[16],"word":[17],"across":[19,123],"different":[20],"first-language":[21],"(L1)":[22],"backgrounds.":[23],"However,":[24],"existing":[25],"approaches":[26],"rely":[27],"on":[28,79],"regression-only":[29],"training":[30],"with":[31,66],"scalar":[32],"supervision,":[33],"which":[34,61],"does":[35],"not":[36],"explicitly":[37],"structure":[38,103],"the":[39,97,101,109],"representation":[40,90],"space,":[41],"limiting":[42],"their":[43],"ability":[44],"capture":[46,100],"cross-lingual":[47,89],"alignment":[48,91],"ordinal":[50,102],"difficulty.":[51],"To":[52],"mitigate":[53],"these":[54],"issues,":[55],"we":[56],"propose":[57],"Context-Aligned":[58],"Contrastive":[59,76],"Regression,":[60],"integrates":[62],"Ridge":[63],"regression":[64],"ensemble":[65,110],"two":[67],"complementary":[68],"objectives,":[69],"i.e.,":[70],"Cross-View":[71],"Context":[72],"Ordinal":[74],"Soft":[75],"Learning.":[77],"Experiments":[78],"three":[80],"L1":[81],"datasets":[82],"show":[83],"that":[84],"(i)":[85],"contrastive":[86],"objectives":[87],"improve":[88],"while":[92],"preserving":[93],"language-specific":[94],"nuances,":[95],"(ii)":[96],"learned":[98],"representations":[99],"of":[104,115],"lexical":[105],"difficulty,":[106],"(iii)":[108],"effectively":[111],"mitigates":[112],"systematic":[113],"biases":[114],"individual":[116],"models,":[117],"leading":[118],"more":[120],"stable":[121],"performance":[122],"levels.":[125]},"counts_by_year":[],"updated_date":"2026-07-01T06:00:48.157686","created_date":"2026-05-13T00:00:00"}
