{"id":"https://openalex.org/W7125717814","doi":"https://doi.org/10.48550/arxiv.2601.16217","title":"ChiEngMixBench: Evaluating Large Language Models on Spontaneous and Natural Chinese-English Code-Mixed Generation","display_name":"ChiEngMixBench: Evaluating Large Language Models on Spontaneous and Natural Chinese-English Code-Mixed Generation","publication_year":2026,"publication_date":"2026-01-02","ids":{"openalex":"https://openalex.org/W7125717814","doi":"https://doi.org/10.48550/arxiv.2601.16217"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.16217","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.16217","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.16217","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123827242","display_name":"Qingyan Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Qingyan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123858980","display_name":"Tongxi Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Tongxi","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5007246980","display_name":"Yunsheng Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Yunsheng","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5123827242"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.321399986743927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.321399986743927,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10265","display_name":"Multilingual Education and Policy","score":0.21469999849796295,"subfield":{"id":"https://openalex.org/subfields/3310","display_name":"Linguistics and Language"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.057999998331069946,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/terminology","display_name":"Terminology","score":0.6567000150680542},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.527899980545044},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5037000179290771},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.460099995136261},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.453000009059906},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.42309999465942383},{"id":"https://openalex.org/keywords/aggregate","display_name":"Aggregate (composite)","score":0.4180999994277954},{"id":"https://openalex.org/keywords/cognition","display_name":"Cognition","score":0.4041999876499176},{"id":"https://openalex.org/keywords/universal-networking-language","display_name":"Universal Networking Language","score":0.39629998803138733},{"id":"https://openalex.org/keywords/natural-language-understanding","display_name":"Natural language understanding","score":0.39309999346733093}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7946000099182129},{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.6567000150680542},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5378999710083008},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5037000179290771},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49480000138282776},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.460099995136261},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.453000009059906},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.42309999465942383},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.4180999994277954},{"id":"https://openalex.org/C169900460","wikidata":"https://www.wikidata.org/wiki/Q2200417","display_name":"Cognition","level":2,"score":0.4041999876499176},{"id":"https://openalex.org/C83479923","wikidata":"https://www.wikidata.org/wiki/Q2063748","display_name":"Universal Networking Language","level":4,"score":0.39629998803138733},{"id":"https://openalex.org/C2779439875","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Natural language understanding","level":3,"score":0.39309999346733093},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.3781000077724457},{"id":"https://openalex.org/C129792486","wikidata":"https://www.wikidata.org/wiki/Q1050419","display_name":"Language identification","level":3,"score":0.3483000099658966},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.3472999930381775},{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.34130001068115234},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.3375999927520752},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.3366999924182892},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.302700012922287},{"id":"https://openalex.org/C2777210771","wikidata":"https://www.wikidata.org/wiki/Q4927124","display_name":"Block (permutation group theory)","level":2,"score":0.2987000048160553},{"id":"https://openalex.org/C206588197","wikidata":"https://www.wikidata.org/wiki/Q846574","display_name":"Reuse","level":2,"score":0.29319998621940613},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2922999858856201},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.2680000066757202},{"id":"https://openalex.org/C179603123","wikidata":"https://www.wikidata.org/wiki/Q1941921","display_name":"Modeling language","level":3,"score":0.26669999957084656},{"id":"https://openalex.org/C189950617","wikidata":"https://www.wikidata.org/wiki/Q937228","display_name":"Property (philosophy)","level":2,"score":0.2635999917984009},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2587999999523163},{"id":"https://openalex.org/C13606891","wikidata":"https://www.wikidata.org/wiki/Q2623243","display_name":"Conceptual model","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C18762648","wikidata":"https://www.wikidata.org/wiki/Q42213","display_name":"Work (physics)","level":2,"score":0.2540999948978424}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.16217","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.16217","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.16217","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.16217","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.7891402840614319}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Code-mixing":[0],"is":[1,34],"increasingly":[2],"prevalent":[3],"in":[4,52],"interactions":[5],"between":[6,126],"humans":[7],"and":[8,36,69,86,131],"large":[9,128],"language":[10,129],"models,":[11],"yet":[12],"existing":[13],"work":[14],"often":[15],"reduces":[16],"it":[17,25],"to":[18,27,48],"a":[19,30,58,76,112],"translation":[20],"or":[21],"convertibility":[22],"problem,":[23,79],"making":[24],"difficult":[26],"assess":[28],"whether":[29],"model's":[31],"switching":[32],"behavior":[33],"context-appropriate":[35],"aligned":[37],"with":[38,115],"human":[39,132],"conventions.":[40],"We":[41],"introduce":[42],"ChiEngMixBench,":[43],"the":[44,116],"first":[45],"benchmark":[46],"designed":[47],"evaluate":[49],"code-mixing":[50,74,97],"ability":[51],"authentic":[53],"community":[54],"contexts,":[55],"built":[56],"upon":[57],"general":[59],"construction":[60],"pipeline":[61],"that":[62,91],"enables":[63],"scalable":[64],"dataset":[65],"development":[66],"across":[67,99],"domains":[68],"bilingual":[70],"pairs.":[71],"ChiEngMixBench":[72],"formulates":[73],"as":[75],"cognitive":[77,124],"alignment":[78,125],"characterized":[80],"by":[81],"two":[82],"complementary":[83],"signals:":[84],"Spontaneity":[85],"Naturalness.":[87],"Empirical":[88],"evaluation":[89],"shows":[90],"our":[92],"metrics":[93],"can":[94],"systematically":[95],"distinguish":[96],"performance":[98],"models.":[100],"Beyond":[101],"benchmarking,":[102],"we":[103],"further":[104],"uncover":[105],"an":[106],"implicitly":[107],"emergent":[108],"Terminology":[109],"Layering":[110],"Strategy,":[111],"phenomenon":[113],"consistent":[114],"Matrix":[117],"Language":[118],"Frame":[119],"(MLF)":[120],"theory,":[121],"indicating":[122],"structured":[123],"multilingual":[127],"models":[130],"communication.":[133]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-01-27T00:00:00"}
