{"id":"https://openalex.org/W4396680888","doi":"https://doi.org/10.1162/tacl_a_00662","title":"<scp>AutoPEFT</scp>: Automatic Configuration Search for Parameter-Efficient Fine-Tuning","display_name":"<scp>AutoPEFT</scp>: Automatic Configuration Search for Parameter-Efficient Fine-Tuning","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4396680888","doi":"https://doi.org/10.1162/tacl_a_00662"},"language":"en","primary_location":{"id":"doi:10.1162/tacl_a_00662","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00662","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00662/2369530/tacl_a_00662.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00662/2369530/tacl_a_00662.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5031130592","display_name":"Han Zhou","orcid":"https://orcid.org/0000-0002-8367-7695"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Han Zhou","raw_affiliation_strings":["Language Technology Lab, University of Cambridge, UK. hz416@cam.ac.uk"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Language Technology Lab, University of Cambridge, UK. hz416@cam.ac.uk","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5083203503","display_name":"Xingchen Wan","orcid":"https://orcid.org/0000-0003-0074-0597"},"institutions":[{"id":"https://openalex.org/I2802123492","display_name":"Oxford Research Group","ror":"https://ror.org/00z4w4f29","country_code":"GB","type":"nonprofit","lineage":["https://openalex.org/I2802123492"]},{"id":"https://openalex.org/I40120149","display_name":"University of Oxford","ror":"https://ror.org/052gg0110","country_code":"GB","type":"education","lineage":["https://openalex.org/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Xingchen Wan","raw_affiliation_strings":["Machine Learning Research Group, University of Oxford, UK. xwan@robots.ox.ac.uk"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Machine Learning Research Group, University of Oxford, UK. xwan@robots.ox.ac.uk","institution_ids":["https://openalex.org/I2802123492","https://openalex.org/I40120149"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014866912","display_name":"Ivan Vuli\u0107","orcid":"https://orcid.org/0000-0002-5161-5422"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Ivan Vuli\u0107","raw_affiliation_strings":["Language Technology Lab, University of Cambridge, UK. iv250@cam.ac.uk"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Language Technology Lab, University of Cambridge, UK. iv250@cam.ac.uk","institution_ids":["https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5081393566","display_name":"Anna Korhonen","orcid":"https://orcid.org/0000-0002-3692-3144"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Anna Korhonen","raw_affiliation_strings":["Language Technology Lab, University of Cambridge, UK. alk23@cam.ac.uk"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Language Technology Lab, University of Cambridge, UK. alk23@cam.ac.uk","institution_ids":["https://openalex.org/I241749"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5014866912","https://openalex.org/A5031130592","https://openalex.org/A5081393566","https://openalex.org/A5083203503"],"corresponding_institution_ids":["https://openalex.org/I241749","https://openalex.org/I2802123492","https://openalex.org/I40120149"],"apc_list":null,"apc_paid":null,"fwci":5.8314,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.9653657,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":"12","issue":null,"first_page":"525","last_page":"542"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9958999752998352,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11032","display_name":"VLSI and Analog Circuit Testing","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12326","display_name":"Network Packet Processing and Optimization","score":0.9789999723434448,"subfield":{"id":"https://openalex.org/subfields/1708","display_name":"Hardware and Architecture"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8112586736679077},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.33273059129714966}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8112586736679077},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.33273059129714966}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/tacl_a_00662","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00662","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00662/2369530/tacl_a_00662.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:be50c482371847b981c1d5afe847ef04","is_oa":false,"landing_page_url":"https://doaj.org/article/be50c482371847b981c1d5afe847ef04","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Transactions of the Association for Computational Linguistics, Vol 12 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/tacl_a_00662","is_oa":true,"landing_page_url":"https://doi.org/10.1162/tacl_a_00662","pdf_url":"https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00662/2369530/tacl_a_00662.pdf","source":{"id":"https://openalex.org/S2729999759","display_name":"Transactions of the Association for Computational Linguistics","issn_l":"2307-387X","issn":["2307-387X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Transactions of the Association for Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4396680888.pdf"},"referenced_works_count":60,"referenced_works":["https://openalex.org/W1558919105","https://openalex.org/W2923014074","https://openalex.org/W2946417913","https://openalex.org/W2963341956","https://openalex.org/W2965373594","https://openalex.org/W2970427081","https://openalex.org/W2981406437","https://openalex.org/W2981985696","https://openalex.org/W3015107381","https://openalex.org/W3020268419","https://openalex.org/W3099178230","https://openalex.org/W3099793224","https://openalex.org/W3101498587","https://openalex.org/W3113331478","https://openalex.org/W3127389359","https://openalex.org/W3157123286","https://openalex.org/W3164008977","https://openalex.org/W3168867926","https://openalex.org/W3174702398","https://openalex.org/W3174770825","https://openalex.org/W3176828726","https://openalex.org/W3206816211","https://openalex.org/W3206907172","https://openalex.org/W4205991051","https://openalex.org/W4206178588","https://openalex.org/W4212774754","https://openalex.org/W4287855051","https://openalex.org/W4287890953","https://openalex.org/W4288089799","https://openalex.org/W4289258965","https://openalex.org/W4289761856","https://openalex.org/W4317889697","https://openalex.org/W4321650187","https://openalex.org/W4385572883","https://openalex.org/W4385573610","https://openalex.org/W4386566659","https://openalex.org/W4389523852","https://openalex.org/W6729956949","https://openalex.org/W6752515464","https://openalex.org/W6759579507","https://openalex.org/W6759828284","https://openalex.org/W6762392948","https://openalex.org/W6766673545","https://openalex.org/W6769627184","https://openalex.org/W6771127686","https://openalex.org/W6771859737","https://openalex.org/W6778883912","https://openalex.org/W6786190416","https://openalex.org/W6790996667","https://openalex.org/W6791245825","https://openalex.org/W6795637285","https://openalex.org/W6796710205","https://openalex.org/W6797716722","https://openalex.org/W6802669662","https://openalex.org/W6802744804","https://openalex.org/W6804126242","https://openalex.org/W6810310701","https://openalex.org/W6837789219","https://openalex.org/W6838701581","https://openalex.org/W6848918270"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W4395014643","https://openalex.org/W4391913857","https://openalex.org/W2350741829"],"abstract_inverted_index":{"Abstract":[0],"Large":[1],"pretrained":[2],"language":[3],"models":[4],"are":[5,73,86,151,175],"widely":[6],"used":[7],"in":[8,68,88,97,129],"downstream":[9],"NLP":[10],"tasks":[11],"via":[12],"task-":[13],"specific":[14],"fine-tuning,":[15],"but":[16],"such":[17,55],"procedures":[18],"can":[19],"be":[20],"costly.":[21],"Recently,":[22],"Parameter-Efficient":[23],"Fine-Tuning":[24],"(PEFT)":[25],"methods":[26,173],"have":[27],"achieved":[28],"strong":[29,142],"task":[30],"performance":[31],"while":[32],"updating":[33],"much":[34],"fewer":[35],"parameters":[36,149],"than":[37,180],"full":[38],"model":[39],"fine-tuning":[40],"(FFT).":[41],"However,":[42],"it":[43,76],"is":[44,77],"non-trivial":[45],"to":[46],"make":[47],"informed":[48],"design":[49,111],"choices":[50],"on":[51,159,176],"the":[52,59,66,70,81],"PEFT":[53,71,106,120,172],"configurations,":[54],"as":[56,122],"their":[57,91],"architecture,":[58],"number":[60],"of":[61,90,139,148],"tunable":[62],"parameters,":[63],"and":[64,161,174],"even":[65],"layers":[67],"which":[69],"modules":[72,121],"inserted.":[74],"Consequently,":[75],"highly":[78,153],"likely":[79],"that":[80,150,166],"current,":[82],"manually":[83],"designed":[84],"configurations":[85,140,168],"suboptimal":[87],"terms":[89],"performance-efficiency":[92],"trade-off.":[93],"Inspired":[94],"by":[95],"advances":[96],"neural":[98],"architecture":[99],"search,":[100],"we":[101,133,164],"propose":[102],"AutoPEFT":[103],"for":[104],"automatic":[105],"configuration":[107,114],"selection:":[108],"We":[109],"first":[110],"an":[112],"expressive":[113],"search":[115],"space":[116],"with":[117,141],"multiple":[118],"representative":[119],"building":[123],"blocks.":[124],"Using":[125],"multi-objective":[126],"Bayesian":[127],"optimization":[128],"a":[130,136],"low-cost":[131],"setup,":[132],"then":[134],"discover":[135],"Pareto-optimal":[137],"set":[138],"performance-cost":[143],"trade-offs":[144],"across":[145,155],"different":[146,156],"numbers":[147],"also":[152],"transferable":[154],"tasks.":[157],"Empirically,":[158],"GLUE":[160],"SuperGLUE":[162],"tasks,":[163],"show":[165],"AutoPEFT-discovered":[167],"significantly":[169],"outperform":[170],"existing":[171],"par":[177],"or":[178],"better":[179],"FFT":[181],"without":[182],"incurring":[183],"substantial":[184],"training":[185],"efficiency":[186],"costs.":[187]},"counts_by_year":[{"year":2026,"cited_by_count":6},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":4}],"updated_date":"2026-06-09T15:46:55.921056","created_date":"2025-10-10T00:00:00"}
