{"id":"https://openalex.org/W7154489433","doi":"https://doi.org/10.48550/arxiv.2604.12377","title":"SCRIPT: A Subcharacter Compositional Representation Injection Module for Korean Pre-Trained Language Models","display_name":"SCRIPT: A Subcharacter Compositional Representation Injection Module for Korean Pre-Trained Language Models","publication_year":2026,"publication_date":"2026-04-14","ids":{"openalex":"https://openalex.org/W7154489433","doi":"https://doi.org/10.48550/arxiv.2604.12377"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.12377","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12377","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.12377","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047328642","display_name":"\uae40\uc22d\ud638","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, SungHo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108851525","display_name":"Juhyeong Park","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Park, Juhyeong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019712650","display_name":"Eda ATALAY","orcid":"https://orcid.org/0000-0002-2928-0486"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Atalay, Eda","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133675269","display_name":"SangKeun Lee","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lee, SangKeun","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.42989999055862427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.42989999055862427,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.18140000104904175,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.10119999945163727,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5572999715805054},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.552299976348877},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.45239999890327454},{"id":"https://openalex.org/keywords/character","display_name":"Character (mathematics)","score":0.4327999949455261},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4325999915599823},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.42730000615119934},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.38370001316070557},{"id":"https://openalex.org/keywords/language-identification","display_name":"Language identification","score":0.3790000081062317}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8183000087738037},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6883000135421753},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5619999766349792},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5572999715805054},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.552299976348877},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.45239999890327454},{"id":"https://openalex.org/C2780861071","wikidata":"https://www.wikidata.org/wiki/Q1062934","display_name":"Character (mathematics)","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4325999915599823},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.42730000615119934},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.38370001316070557},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.3790000081062317},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.375},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.37439998984336853},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.35929998755455017},{"id":"https://openalex.org/C61423126","wikidata":"https://www.wikidata.org/wiki/Q187432","display_name":"Scripting language","level":2,"score":0.3158000111579895},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.31450000405311584},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.30809998512268066},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.2849000096321106},{"id":"https://openalex.org/C98954769","wikidata":"https://www.wikidata.org/wiki/Q1759657","display_name":"Lexical semantics","level":3,"score":0.2847000062465668},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.27309998869895935},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2648000121116638},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.25600001215934753},{"id":"https://openalex.org/C126706616","wikidata":"https://www.wikidata.org/wiki/Q2944660","display_name":"Lexical item","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.2547999918460846},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.25099998712539673}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.12377","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12377","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.12377","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.12377","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8325248956680298,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Korean":[0,33,46,85,112],"is":[1,15,149],"a":[2,7,76,104,136],"morphologically":[3],"rich":[4],"language":[5,47,114],"with":[6,93],"featural":[8],"writing":[9],"system":[10],"in":[11,135],"which":[12,56],"each":[13],"character":[14],"systematically":[16],"composed":[17],"of":[18,32,67],"subcharacter":[19,81],"units":[20],"known":[21],"as":[22],"Jamo.":[23],"These":[24],"subcharacters":[25],"not":[26,58],"only":[27],"determine":[28],"the":[29,63,132],"visual":[30],"structure":[31,66],"but":[34],"also":[35],"encode":[36],"frequent":[37],"and":[38,117,143],"linguistically":[39],"meaningful":[40],"morphophonological":[41],"processes.":[42],"However,":[43],"most":[44],"current":[45],"models":[48],"(LMs)":[49],"are":[50,57],"based":[51],"on":[52],"subword":[53,91],"tokenization":[54],"schemes,":[55],"explicitly":[59],"designed":[60],"to":[61,89],"capture":[62],"internal":[64],"compositional":[65,82],"characters.":[68],"To":[69],"address":[70],"this":[71],"limitation,":[72],"we":[73],"propose":[74],"SCRIPT,":[75],"model-agnostic":[77],"module":[78],"that":[79,129,138],"injects":[80],"knowledge":[83],"into":[84],"PLMs.":[86],"SCRIPT":[87,106,130],"allows":[88],"enhance":[90],"embeddings":[92],"structural":[94],"granularity,":[95],"without":[96],"requiring":[97],"architectural":[98],"changes":[99],"or":[100],"additional":[101],"pre-training.":[102],"As":[103],"result,":[105],"enhances":[107],"all":[108],"baselines":[109],"across":[110],"various":[111],"natural":[113],"understanding":[115],"(NLU)":[116],"generation":[118],"(NLG)":[119],"tasks.":[120],"Moreover,":[121],"beyond":[122],"performance":[123],"gains,":[124],"detailed":[125],"linguistic":[126],"analyses":[127],"show":[128],"reshapes":[131],"embedding":[133],"space":[134],"way":[137],"better":[139],"captures":[140],"grammatical":[141],"regularities":[142],"semantically":[144],"cohesive":[145],"variations.":[146],"Our":[147],"code":[148],"available":[150],"at":[151],"https://github.com/SungHo3268/SCRIPT.":[152]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-16T00:00:00"}
