{"id":"https://openalex.org/W4415428094","doi":"https://doi.org/10.3233/faia251329","title":"Diversity-Aware Self-Paced Data Selection for LLM Fine-Tuning","display_name":"Diversity-Aware Self-Paced Data Selection for LLM Fine-Tuning","publication_year":2025,"publication_date":"2025-10-21","ids":{"openalex":"https://openalex.org/W4415428094","doi":"https://doi.org/10.3233/faia251329"},"language":null,"primary_location":{"id":"doi:10.3233/faia251329","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251329","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/faia251329","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087524444","display_name":"Yingxuan Yang","orcid":"https://orcid.org/0000-0002-0494-7440"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yingxuan Yang","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101484952","display_name":"Huayi Wang","orcid":"https://orcid.org/0000-0002-7395-1056"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huayi Wang","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049802452","display_name":"Muning Wen","orcid":"https://orcid.org/0009-0000-7868-1262"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Muning Wen","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024166490","display_name":"Xiaoyun Mo","orcid":"https://orcid.org/0000-0001-5059-3500"},"institutions":[{"id":"https://openalex.org/I4210130009","display_name":"Shenzhen Institute of Building Research (China)","ror":"https://ror.org/039vzrf55","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210130009"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoyun Mo","raw_affiliation_strings":["Oppo Research Institute, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Oppo Research Institute, Shenzhen, China","institution_ids":["https://openalex.org/I4210130009"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111154818","display_name":"Qiuying Peng","orcid":null},"institutions":[{"id":"https://openalex.org/I4210130009","display_name":"Shenzhen Institute of Building Research (China)","ror":"https://ror.org/039vzrf55","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210130009"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiuying Peng","raw_affiliation_strings":["Oppo Research Institute, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Oppo Research Institute, Shenzhen, China","institution_ids":["https://openalex.org/I4210130009"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100384727","display_name":"Jun Wang","orcid":"https://orcid.org/0000-0002-4021-4228"},"institutions":[{"id":"https://openalex.org/I4210130009","display_name":"Shenzhen Institute of Building Research (China)","ror":"https://ror.org/039vzrf55","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210130009"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jun Wang","raw_affiliation_strings":["Oppo Research Institute, Shenzhen, China"],"affiliations":[{"raw_affiliation_string":"Oppo Research Institute, Shenzhen, China","institution_ids":["https://openalex.org/I4210130009"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5090720315","display_name":"Weinan Zhang","orcid":"https://orcid.org/0000-0002-0127-2425"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Weinan Zhang","raw_affiliation_strings":["Shanghai Jiao Tong University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University, Shanghai, China","institution_ids":["https://openalex.org/I183067930"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5087524444"],"corresponding_institution_ids":["https://openalex.org/I183067930"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.73455056,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11201","display_name":"Metallurgy and Material Forming","score":0.8928999900817871,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11201","display_name":"Metallurgy and Material Forming","score":0.8928999900817871,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11338","display_name":"Advancements in Photolithography Techniques","score":0.8129000067710876,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.7558000087738037,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6618000268936157},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.5629000067710876},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.4823000133037567},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.4357999861240387},{"id":"https://openalex.org/keywords/model-selection","display_name":"Model selection","score":0.3944000005722046},{"id":"https://openalex.org/keywords/data-collection","display_name":"Data collection","score":0.31540000438690186},{"id":"https://openalex.org/keywords/data-manipulation-language","display_name":"Data manipulation language","score":0.30079999566078186}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8019999861717224},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6618000268936157},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.61080002784729},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.5629000067710876},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.4823000133037567},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46810001134872437},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.4357999861240387},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.41679999232292175},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.3944000005722046},{"id":"https://openalex.org/C133462117","wikidata":"https://www.wikidata.org/wiki/Q4929239","display_name":"Data collection","level":2,"score":0.31540000438690186},{"id":"https://openalex.org/C56288433","wikidata":"https://www.wikidata.org/wiki/Q58673","display_name":"Data manipulation language","level":2,"score":0.30079999566078186},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.28700000047683716},{"id":"https://openalex.org/C84462506","wikidata":"https://www.wikidata.org/wiki/Q173142","display_name":"Digital signal processing","level":2,"score":0.2842000126838684},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.2721000015735626},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.26649999618530273},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.26579999923706055},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2621999979019165}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/faia251329","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251329","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"}],"best_oa_location":{"id":"doi:10.3233/faia251329","is_oa":true,"landing_page_url":"https://doi.org/10.3233/faia251329","pdf_url":null,"source":{"id":"https://openalex.org/S4210201731","display_name":"Frontiers in artificial intelligence and applications","issn_l":"0922-6389","issn":["0922-6389","1879-8314"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Frontiers in Artificial Intelligence and Applications","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Fine-tuning":[0],"large":[1],"language":[2],"models":[3],"(LLMs)":[4],"is":[5,105],"challenged":[6],"by":[7],"the":[8,14,56,69,92,128,137,162,192],"presence":[9],"of":[10,65,103,195],"noisy":[11],"data":[12,24,38,77,93,98,122,198],"and":[13,36,141,149,166,180,186],"high":[15],"computational":[16],"cost":[17,35],"when":[18],"training":[19,34,104,133,167],"on":[20,44,136,170],"large-scale":[21],"datasets.":[22],"While":[23],"selection":[25,94,123,199],"has":[26],"emerged":[27],"as":[28,62],"a":[29,119,143],"promising":[30],"approach":[31],"to":[32,53,55,87,146,161],"reduce":[33],"improve":[37],"quality,":[39],"existing":[40],"methods":[41],"often":[42],"rely":[43],"static":[45,154,179],"heuristics":[46],"or":[47,155],"manual":[48],"metrics.":[49],"These":[50],"approaches":[51],"struggle":[52],"adapt":[54],"model\u2019s":[57,138,163],"evolving":[58],"capabilities":[59],"during":[60],"training,":[61],"its":[63,74],"understanding":[64],"tasks":[66],"improves.":[67],"As":[68],"model":[70],"becomes":[71],"more":[72],"powerful,":[73],"requirements":[75],"for":[76,107],"that":[78,125,175],"can":[79],"enhance":[80,147],"performance":[81],"also":[82],"change,":[83],"making":[84],"it":[85],"crucial":[86],"incorporate":[88],"this":[89],"dynamic":[90],"into":[91],"process.":[95],"Moreover,":[96],"ensuring":[97],"diversity":[99],"throughout":[100],"different":[101],"stages":[102],"essential":[106],"preventing":[108],"redundancy,":[109],"reducing":[110],"overfitting.":[111,151],"To":[112],"address":[113],"these":[114],"issues,":[115],"we":[116],"propose":[117],"DSP,":[118],"Diversity-Aware":[120],"Self-Paced":[121],"framework":[124],"evolves":[126],"with":[127],"model.":[129],"DSP":[130,158,176],"progressively":[131],"selects":[132],"samples":[134],"based":[135],"own":[139],"outputs":[140],"incorporates":[142],"diversity-aware":[144,197],"mechanism":[145],"generalization":[148],"mitigate":[150],"Unlike":[152],"prior":[153],"rule-based":[156],"strategies,":[157],"adaptively":[159],"adjusts":[160],"internal":[164],"feedback":[165],"stage.":[168],"Experiments":[169],"two":[171],"public":[172],"benchmarks":[173],"demonstrate":[174],"consistently":[177],"outperforms":[178],"heuristic-based":[181],"baselines":[182],"across":[183],"multiple":[184],"datasets":[185],"backbone":[187],"models.":[188],"Our":[189],"findings":[190],"highlight":[191],"critical":[193],"role":[194],"dynamic,":[196],"in":[200],"effective":[201],"LLM":[202],"fine-tuning.":[203]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-24T00:00:00"}
