{"id":"https://openalex.org/W4402670361","doi":"https://doi.org/10.18653/v1/2024.findings-acl.623","title":"Smaller Language Models are capable of selecting Instruction-Tuning Training Data for Larger Language Models","display_name":"Smaller Language Models are capable of selecting Instruction-Tuning Training Data for Larger Language Models","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4402670361","doi":"https://doi.org/10.18653/v1/2024.findings-acl.623"},"language":"en","primary_location":{"id":"doi:10.18653/v1/2024.findings-acl.623","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.623","pdf_url":"https://aclanthology.org/2024.findings-acl.623.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2024.findings-acl.623.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032931351","display_name":"Dheeraj Mekala","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dheeraj Mekala","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090566031","display_name":"Alex Nguyen","orcid":"https://orcid.org/0000-0002-5109-871X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alex Nguyen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5039500313","display_name":"Jingbo Shang","orcid":"https://orcid.org/0000-0002-7249-4404"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jingbo Shang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.6109,"has_fulltext":true,"cited_by_count":2,"citation_normalized_percentile":{"value":0.73998821,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"10456","last_page":"10470"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9923999905586243,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.991100013256073,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8061975240707397},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.7118395566940308},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.6314826607704163},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.5341253280639648},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.517841637134552},{"id":"https://openalex.org/keywords/data-modeling","display_name":"Data modeling","score":0.5060555338859558},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.45883315801620483},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.33067911863327026},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.11757370829582214}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8061975240707397},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.7118395566940308},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.6314826607704163},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.5341253280639648},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.517841637134552},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.5060555338859558},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.45883315801620483},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.33067911863327026},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.11757370829582214},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2024.findings-acl.623","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.623","pdf_url":"https://aclanthology.org/2024.findings-acl.623.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2024.findings-acl.623","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2024.findings-acl.623","pdf_url":"https://aclanthology.org/2024.findings-acl.623.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Findings of the Association for Computational Linguistics ACL 2024","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.5899999737739563,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402670361.pdf","grobid_xml":"https://content.openalex.org/works/W4402670361.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W230091440","https://openalex.org/W2233261550","https://openalex.org/W2810751659","https://openalex.org/W4394050964","https://openalex.org/W2551249631","https://openalex.org/W4287644835","https://openalex.org/W3092281475","https://openalex.org/W3098003361","https://openalex.org/W4285322112","https://openalex.org/W4292794239"],"abstract_inverted_index":{"Instruction-tuning":[0],"language":[1,46],"models":[2,47,80,141],"has":[3],"become":[4],"a":[5,30,104,118,164,172],"crucial":[6],"step":[7],"in":[8,88,123,145],"aligning":[9],"them":[10],"for":[11,79,117],"general":[12],"use.Typically,":[13],"this":[14,26,76],"process":[15],"involves":[16],"extensive":[17],"training":[18,24,32,55,65,112,132,151,168],"on":[19,36,66,133],"large":[20],"datasets,":[21],"incurring":[22],"high":[23],"costs.In":[25],"paper,":[27],"we":[28,90],"introduce":[29],"novel":[31,165],"data":[33,97,113,169],"selection":[34],"based":[35],"the":[37,41,49,67,96,134],"learning":[38],"percentage":[39],"of":[40],"samples.We":[42],"assert":[43],"that":[44,75,95],"current":[45],"possess":[48],"capability":[50],"to":[51,58,64,85,131,143,167],"autonomously":[52],"select":[53],"high-quality":[54,111],"data,":[56],"leading":[57],"comparable":[59],"or":[60,126],"improved":[61],"performance":[62],"compared":[63,130],"entire":[68],"dataset.Our":[69],"experiments":[70],"span":[71],"different-sized":[72],"models,":[73],"revealing":[74],"characteristic":[77],"holds":[78],"ranging":[81],"from":[82],"1B":[83],"(small)":[84],"13B":[86,120,144],"(large)":[87],"size.Moreover,":[89],"demonstrate":[91],"an":[92,124],"interesting":[93],"finding":[94],"hardness":[98],"transfers":[99],"across":[100],"model":[101,107,129],"sizes,":[102],"and":[103,139,153],"smaller":[105],"350M":[106],"can":[108],"effectively":[109],"curate":[110],"with":[114],"hard":[115],"samples":[116],"larger":[119],"model,":[121],"resulting":[122],"equally":[125],"superior":[127],"instructiontuned":[128],"complete":[135],"dataset.Utilizing":[136],"open-sourced":[137],"OPT":[138],"Llama-2":[140],"up":[142],"size,":[146],"two":[147],"publicly":[148],"available":[149],"instruction-tuning":[150],"datasets":[152],"evaluated":[154],"by":[155],"both":[156],"automatic":[157],"metrics":[158],"&":[159],"humans,":[160],"our":[161],"paper":[162],"introduces":[163],"approach":[166],"selection,":[170],"showcasing":[171],"more":[173],"efficient":[174],"alternative.":[175]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
