{"id":"https://openalex.org/W4415708630","doi":"https://doi.org/10.1109/icme59968.2025.11209076","title":"TACOS: Open Tagging and Comparative Scoring for Instruction Fine-Tuning Data Selection","display_name":"TACOS: Open Tagging and Comparative Scoring for Instruction Fine-Tuning Data Selection","publication_year":2025,"publication_date":"2025-06-30","ids":{"openalex":"https://openalex.org/W4415708630","doi":"https://doi.org/10.1109/icme59968.2025.11209076"},"language":null,"primary_location":{"id":"doi:10.1109/icme59968.2025.11209076","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209076","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108123164","display_name":"X. Q. He","orcid":"https://orcid.org/0009-0002-0131-2834"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xixiang He","raw_affiliation_strings":["National University of Defense Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103179183","display_name":"Hao Yu","orcid":"https://orcid.org/0000-0001-8353-8110"},"institutions":[{"id":"https://openalex.org/I4210165407","display_name":"Intelligent Decision Systems (Spain)","ror":"https://ror.org/05xp52m23","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210165407"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Hao Yu","raw_affiliation_strings":["Intelligent Game and Decision Lah,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Intelligent Game and Decision Lah,Beijing,China","institution_ids":["https://openalex.org/I4210165407"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069955912","display_name":"Qiong Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qiyao Sun","raw_affiliation_strings":["National University of Defense Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5017953971","display_name":"Ann\u2010Lii Cheng","orcid":"https://orcid.org/0000-0002-9152-6512"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ao Cheng","raw_affiliation_strings":["National University of Defense Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006901308","display_name":"Tailai Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tailai Zhang","raw_affiliation_strings":["National University of Defense Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102704908","display_name":"Cong Liu","orcid":"https://orcid.org/0009-0001-6775-1269"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cong Liu","raw_affiliation_strings":["National University of Defense Technology,Changsha,China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology,Changsha,China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011781002","display_name":"Shuxuan Guo","orcid":null},"institutions":[{"id":"https://openalex.org/I4210165407","display_name":"Intelligent Decision Systems (Spain)","ror":"https://ror.org/05xp52m23","country_code":"ES","type":"company","lineage":["https://openalex.org/I4210165407"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Shuxuan Guo","raw_affiliation_strings":["Intelligent Game and Decision Lah,Beijing,China"],"affiliations":[{"raw_affiliation_string":"Intelligent Game and Decision Lah,Beijing,China","institution_ids":["https://openalex.org/I4210165407"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5108123164"],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17539259,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3799000084400177,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.3799000084400177,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.1404000073671341,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.08900000154972076,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6876000165939331},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.656499981880188},{"id":"https://openalex.org/keywords/normalization","display_name":"Normalization (sociology)","score":0.6229000091552734},{"id":"https://openalex.org/keywords/data-quality","display_name":"Data quality","score":0.45509999990463257},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.44780001044273376},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.44609999656677246},{"id":"https://openalex.org/keywords/labeled-data","display_name":"Labeled data","score":0.3765999972820282}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8287000060081482},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6876000165939331},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.656499981880188},{"id":"https://openalex.org/C136886441","wikidata":"https://www.wikidata.org/wiki/Q926129","display_name":"Normalization (sociology)","level":2,"score":0.6229000091552734},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5200999975204468},{"id":"https://openalex.org/C24756922","wikidata":"https://www.wikidata.org/wiki/Q1757694","display_name":"Data quality","level":3,"score":0.45509999990463257},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4514000117778778},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45019999146461487},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.44780001044273376},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.44609999656677246},{"id":"https://openalex.org/C2776145971","wikidata":"https://www.wikidata.org/wiki/Q30673951","display_name":"Labeled data","level":2,"score":0.3765999972820282},{"id":"https://openalex.org/C3020493868","wikidata":"https://www.wikidata.org/wiki/Q55631277","display_name":"Real world data","level":2,"score":0.37220001220703125},{"id":"https://openalex.org/C117354338","wikidata":"https://www.wikidata.org/wiki/Q1165112","display_name":"Singleton","level":3,"score":0.33399999141693115},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.32330000400543213},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.3208000063896179},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.28760001063346863},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.27250000834465027},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2515999972820282}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme59968.2025.11209076","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme59968.2025.11209076","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W2064853889","https://openalex.org/W2101105183","https://openalex.org/W2905390632","https://openalex.org/W2945232141","https://openalex.org/W3201254286","https://openalex.org/W4385572634","https://openalex.org/W4385572845","https://openalex.org/W4385573489","https://openalex.org/W4389520255","https://openalex.org/W4389524493","https://openalex.org/W4396739142","https://openalex.org/W4401042981","https://openalex.org/W4402667093","https://openalex.org/W4402684151"],"related_works":[],"abstract_inverted_index":{"Instruction":[0],"Fine-Timing":[1],"(IFT)":[2],"is":[3],"crucial":[4],"for":[5,56,79,174],"aligning":[6],"large":[7,152],"language":[8],"models":[9,167],"(LLMs)":[10],"with":[11],"human":[12,95],"preferences,":[13],"and":[14,32,76,107,141,162],"selecting":[15],"a":[16,99,114,127,151],"small":[17],"yet":[18],"representative":[19],"subset":[20],"from":[21,38],"massive":[22],"data":[23,47,52,81,85,176],"significantly":[24],"facilitates":[25],"IFT":[26,80,175],"in":[27,133],"terms":[28],"of":[29,43,124],"both":[30],"efficiency":[31],"effectiveness.":[33],"Nevertheless,":[34],"existing":[35,148],"approaches":[36,149],"suffer":[37],"two":[39],"limitations:":[40],"the":[41,50,64,104,120],"use":[42],"simple":[44],"heuristics":[45],"restricts":[46],"diversity,":[48,86],"while":[49],"singleton":[51],"quality":[53,122],"evaluation":[54,123],"accounts":[55],"inconsistent":[57,130],"criteria":[58,131],"between":[59],"independent":[60],"samples.":[61],"To":[62,83],"address":[63],"issues,":[65],"we":[66,87,112],"present":[67],"TACOS,":[68],"an":[69],"innovative":[70],"method":[71,117],"that":[72,118,145],"integrates":[73],"Open":[74],"Tagging":[75],"Comparative":[77],"Scoring":[78],"selection.":[82,177],"capture":[84],"leverage":[88],"LLMs":[89],"to":[90,94,102],"assign":[91],"open-domain":[92],"tags":[93,106],"queries,":[96],"followed":[97],"by":[98,150],"normalization":[100],"stage":[101],"denoise":[103],"open":[105],"enable":[108],"efficient":[109],"clustering.":[110],"Additionally,":[111],"suggest":[113],"comparative":[115],"scoring":[116],"allows":[119],"relative":[121],"samples":[125],"within":[126],"cluster,":[128],"avoiding":[129],"seen":[132],"singleton-based":[134],"evaluations.":[135],"Extensive":[136],"experiments":[137],"across":[138],"diverse":[139],"datasets":[140],"LLM":[142],"architectures":[143],"demonstrate":[144],"TACOS":[146],"outperforms":[147],"margin.":[153],"Notably,":[154],"it":[155],"achieves":[156],"superior":[157],"instruction-following":[158],"performance":[159],"on":[160,168],"MT-Bench":[161],"ranks":[163],"1st":[164],"among":[165],"LLaMA2-7B-Based":[166],"AlpacaEval":[169],"2.0,":[170],"illustrating":[171],"its":[172],"efficacy":[173]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-30T00:00:00"}
