{"id":"https://openalex.org/W7129537528","doi":"https://doi.org/10.48550/arxiv.2602.14696","title":"A Critical Look at Targeted Instruction Selection: Disentangling What Matters (and What Doesn't)","display_name":"A Critical Look at Targeted Instruction Selection: Disentangling What Matters (and What Doesn't)","publication_year":2026,"publication_date":"2026-02-16","ids":{"openalex":"https://openalex.org/W7129537528","doi":"https://doi.org/10.48550/arxiv.2602.14696"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2602.14696","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14696","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2602.14696","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5126271987","display_name":"Nihal V. Nayak","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nayak, Nihal V.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114285369","display_name":"Paula Rodriguez-Diaz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rodriguez-Diaz, Paula","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126256951","display_name":"Neha Hulkund","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hulkund, Neha","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126250696","display_name":"Sara Beery","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Beery, Sara","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5110947683","display_name":"David Alvarez-Melis","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alvarez-Melis, David","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5126271987"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.19509999454021454,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.19509999454021454,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.13369999825954437,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.0851999968290329,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.7870000004768372},{"id":"https://openalex.org/keywords/clarity","display_name":"CLARITY","score":0.6883999705314636},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.620199978351593},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.5408999919891357},{"id":"https://openalex.org/keywords/core","display_name":"Core (optical fiber)","score":0.484499990940094},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.47749999165534973},{"id":"https://openalex.org/keywords/greedy-algorithm","display_name":"Greedy algorithm","score":0.46639999747276306},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4643000066280365}],"concepts":[{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.7870000004768372},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7346000075340271},{"id":"https://openalex.org/C2777146004","wikidata":"https://www.wikidata.org/wiki/Q14949826","display_name":"CLARITY","level":2,"score":0.6883999705314636},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.620199978351593},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.5408999919891357},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.484499990940094},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.47749999165534973},{"id":"https://openalex.org/C51823790","wikidata":"https://www.wikidata.org/wiki/Q504353","display_name":"Greedy algorithm","level":2,"score":0.46639999747276306},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4643000066280365},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.4584999978542328},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4392000138759613},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.4025999903678894},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3993000090122223},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3693000078201294},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.35339999198913574},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.34209999442100525},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.32749998569488525},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.32710000872612},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.31949999928474426},{"id":"https://openalex.org/C116409475","wikidata":"https://www.wikidata.org/wiki/Q1385056","display_name":"External Data Representation","level":2,"score":0.3183000087738037},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.30709999799728394},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.2937999963760376},{"id":"https://openalex.org/C192028432","wikidata":"https://www.wikidata.org/wiki/Q845739","display_name":"Query language","level":2,"score":0.2554999887943268},{"id":"https://openalex.org/C2776502983","wikidata":"https://www.wikidata.org/wiki/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2549999952316284},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.25380000472068787},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.250900000333786},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.250900000333786}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2602.14696","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14696","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2602.14696","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.14696","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Quality Education","score":0.557008683681488,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Instruction":[0],"fine-tuning":[1],"of":[2,12,58,170],"large":[3,18],"language":[4],"models":[5],"(LLMs)":[6],"often":[7,49],"involves":[8],"selecting":[9,69],"a":[10,17,22,62,140,198],"subset":[11,177],"instruction":[13,37],"training":[14],"data":[15,95,115,203],"from":[16,26],"candidate":[19],"pool,":[20],"using":[21],"small":[23],"query":[24,123,180],"set":[25],"the":[27,33,56,91,122,175,179],"target":[28,73],"task.":[29],"Despite":[30],"growing":[31],"interest,":[32],"literature":[34],"on":[35,68,149],"targeted":[36],"selection":[38,47,98,143,166,204],"remains":[39],"fragmented":[40],"and":[41,53,88,97,108,129,178,182,197],"opaque:":[42],"methods":[43],"vary":[44],"widely":[45],"in":[46,205],"budgets,":[48,153],"omit":[50],"zero-shot":[51],"baselines,":[52],"frequently":[54],"entangle":[55],"contributions":[57],"key":[59],"components.":[60],"As":[61],"result,":[63],"practitioners":[64],"lack":[65],"actionable":[66],"guidance":[67],"instructions":[70],"for":[71,200],"their":[72],"tasks.":[74],"In":[75],"this":[76,84,184],"work,":[77],"we":[78,162],"aim":[79],"to":[80,83,121,146],"bring":[81],"clarity":[82],"landscape":[85],"by":[86],"disentangling":[87],"systematically":[89],"analyzing":[90],"two":[92],"core":[93],"ingredients:":[94],"representation":[96],"algorithms.":[99],"Our":[100],"framework":[101],"enables":[102],"controlled":[103],"comparisons":[104],"across":[105,127],"models,":[106],"tasks,":[107],"budgets.":[109,160],"We":[110],"find":[111],"that":[112],"only":[113],"gradient-based":[114,136],"representations":[116,137],"choose":[117],"subsets":[118],"whose":[119],"similarity":[120],"consistently":[124],"predicts":[125],"performance":[126],"datasets":[128],"models.":[130],"While":[131],"no":[132],"single":[133],"method":[134],"dominates,":[135],"paired":[138],"with":[139,186],"greedy":[141],"round-robin":[142],"algorithm":[144],"tend":[145],"perform":[147],"best":[148],"average":[150],"at":[151,158,212],"low":[152],"but":[154],"these":[155],"benefits":[156],"diminish":[157],"larger":[159],"Finally,":[161],"unify":[163],"several":[164],"existing":[165],"algorithms":[167],"as":[168],"forms":[169],"approximate":[171],"distance":[172],"minimization":[173],"between":[174],"selected":[176],"set,":[181],"support":[183],"view":[185],"new":[187],"generalization":[188],"bounds.":[189],"More":[190],"broadly,":[191],"our":[192],"findings":[193],"provide":[194],"critical":[195],"insights":[196],"foundation":[199],"more":[201],"principled":[202],"LLM":[206],"fine-tuning.":[207],"The":[208],"code":[209],"is":[210],"available":[211],"https://github.com/dcml-lab/targeted-instruction-selection.":[213]},"counts_by_year":[],"updated_date":"2026-02-18T06:25:47.457606","created_date":"2026-02-18T00:00:00"}
