{"id":"https://openalex.org/W7152963808","doi":"https://doi.org/10.48550/arxiv.2604.07963","title":"Rethinking Data Mixing from the Perspective of Large Language Models","display_name":"Rethinking Data Mixing from the Perspective of Large Language Models","publication_year":2026,"publication_date":"2026-04-09","ids":{"openalex":"https://openalex.org/W7152963808","doi":"https://doi.org/10.48550/arxiv.2604.07963"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.07963","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07963","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.07963","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123573670","display_name":"Yuanjian Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Yuanjian","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123588770","display_name":"Tianze Sun","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Tianze","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133334511","display_name":"Changwei Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Changwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5129749038","display_name":"Xinlong Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, XinLong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123558253","display_name":"Jianing Hao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hao, Jianing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133370942","display_name":"Ran Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Ran","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100638749","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0002-4795-9169"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133347737","display_name":"Ruijie Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Ruijie","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133386773","display_name":"Stephen Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chen, Stephen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133338751","display_name":"Guang Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Guang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.49779999256134033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.49779999256134033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.21819999814033508,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.07769999653100967,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/perspective","display_name":"Perspective (graphical)","score":0.5852000117301941},{"id":"https://openalex.org/keywords/weighting","display_name":"Weighting","score":0.4916999936103821},{"id":"https://openalex.org/keywords/mixing","display_name":"Mixing (physics)","score":0.4756999909877777},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.4602000117301941},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4490000009536743},{"id":"https://openalex.org/keywords/scheduling","display_name":"Scheduling (production processes)","score":0.4392000138759613},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.38609999418258667}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6933000087738037},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.5852000117301941},{"id":"https://openalex.org/C183115368","wikidata":"https://www.wikidata.org/wiki/Q856577","display_name":"Weighting","level":2,"score":0.4916999936103821},{"id":"https://openalex.org/C138777275","wikidata":"https://www.wikidata.org/wiki/Q6884054","display_name":"Mixing (physics)","level":2,"score":0.4756999909877777},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.4602000117301941},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4490000009536743},{"id":"https://openalex.org/C206729178","wikidata":"https://www.wikidata.org/wiki/Q2271896","display_name":"Scheduling (production processes)","level":2,"score":0.4392000138759613},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43070000410079956},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.38609999418258667},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.34940001368522644},{"id":"https://openalex.org/C2993724205","wikidata":"https://www.wikidata.org/wiki/Q315","display_name":"Human language","level":2,"score":0.34200000762939453},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3328000009059906},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33090001344680786},{"id":"https://openalex.org/C26760741","wikidata":"https://www.wikidata.org/wiki/Q160402","display_name":"Perception","level":2,"score":0.3303999900817871},{"id":"https://openalex.org/C166052673","wikidata":"https://www.wikidata.org/wiki/Q83021","display_name":"Empirical evidence","level":2,"score":0.325300008058548},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.32170000672340393},{"id":"https://openalex.org/C145912823","wikidata":"https://www.wikidata.org/wiki/Q113558","display_name":"Dynamics (music)","level":2,"score":0.31459999084472656},{"id":"https://openalex.org/C133199616","wikidata":"https://www.wikidata.org/wiki/Q25386885","display_name":"Empirical modelling","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C2985583900","wikidata":"https://www.wikidata.org/wiki/Q722617","display_name":"Formal description","level":2,"score":0.27810001373291016},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.263700008392334},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2599000036716461}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.07963","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07963","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.07963","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.07963","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Data":[0],"mixing":[1],"strategy":[2],"is":[3],"essential":[4],"for":[5],"large":[6],"language":[7],"model":[8,40],"(LLM)":[9],"training.":[10],"Empirical":[11],"evidence":[12],"shows":[13],"that":[14,70,89,107],"inappropriate":[15],"strategies":[16],"can":[17],"significantly":[18],"reduce":[19],"generalization.":[20,51],"Although":[21],"recent":[22],"methods":[23],"have":[24],"improved":[25],"empirical":[26],"performance,":[27],"several":[28],"fundamental":[29],"questions":[30,55],"remain":[31],"open:":[32],"what":[33],"constitutes":[34],"a":[35,67,86,94],"domain,":[36],"whether":[37],"human":[38],"and":[39,46,63],"perceptions":[41],"of":[42,74,103],"domains":[43,75],"are":[44],"aligned,":[45],"how":[47],"domain":[48,64],"weighting":[49],"influences":[50],"We":[52],"address":[53],"these":[54],"by":[56],"establishing":[57],"formal":[58],"connections":[59],"between":[60],"gradient":[61],"dynamics":[62],"distributions,":[65],"offering":[66],"theoretical":[68],"framework":[69,88],"clarifies":[71],"the":[72],"role":[73],"in":[76],"training":[77],"dynamics.":[78],"Building":[79],"on":[80,100],"this":[81],"analysis,":[82],"we":[83],"introduce":[84],"DoGraph,":[85],"reweighting":[87],"formulates":[90],"data":[91],"scheduling":[92],"as":[93],"graph-constrained":[95],"optimization":[96],"problem.":[97],"Extensive":[98],"experiments":[99],"GPT-2":[101],"models":[102],"varying":[104],"scales":[105],"demonstrate":[106],"DoGraph":[108],"consistently":[109],"achieves":[110],"competitive":[111],"performance.":[112]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-11T00:00:00"}
