{"id":"https://openalex.org/W4400582826","doi":"https://doi.org/10.1145/3643753","title":"Natural Is the Best: Model-Agnostic Code Simplification for Pre-trained Large Language Models","display_name":"Natural Is the Best: Model-Agnostic Code Simplification for Pre-trained Large Language Models","publication_year":2024,"publication_date":"2024-07-12","ids":{"openalex":"https://openalex.org/W4400582826","doi":"https://doi.org/10.1145/3643753"},"language":"en","primary_location":{"id":"doi:10.1145/3643753","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643753","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.1145/3643753","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100322854","display_name":"Yan Wang","orcid":"https://orcid.org/0000-0002-9876-5823"},"institutions":[{"id":"https://openalex.org/I137867983","display_name":"Central University of Finance and Economics","ror":"https://ror.org/008e3hf02","country_code":"CN","type":"education","lineage":["https://openalex.org/I137867983"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yan Wang","raw_affiliation_strings":["Central University of Finance and Economics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Central University of Finance and Economics, Beijing, China","institution_ids":["https://openalex.org/I137867983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078582241","display_name":"X. Li","orcid":"https://orcid.org/0009-0004-6845-2721"},"institutions":[{"id":"https://openalex.org/I137867983","display_name":"Central University of Finance and Economics","ror":"https://ror.org/008e3hf02","country_code":"CN","type":"education","lineage":["https://openalex.org/I137867983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoning Li","raw_affiliation_strings":["Central University of Finance and Economics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Central University of Finance and Economics, Beijing, China","institution_ids":["https://openalex.org/I137867983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5089000736","display_name":"Tien N. Nguyen","orcid":"https://orcid.org/0009-0006-7962-6090"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tien N. Nguyen","raw_affiliation_strings":["University of Texas at Dallas, Dallas, USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Dallas, Dallas, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070990953","display_name":"Shaohua Wang","orcid":"https://orcid.org/0000-0001-5777-7759"},"institutions":[{"id":"https://openalex.org/I137867983","display_name":"Central University of Finance and Economics","ror":"https://ror.org/008e3hf02","country_code":"CN","type":"education","lineage":["https://openalex.org/I137867983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shaohua Wang","raw_affiliation_strings":["Central University of Finance and Economics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Central University of Finance and Economics, Beijing, China","institution_ids":["https://openalex.org/I137867983"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102737361","display_name":"Chao Ni","orcid":"https://orcid.org/0000-0002-2906-0598"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chao Ni","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101079693","display_name":"Ling Ding","orcid":"https://orcid.org/0009-0005-8189-2040"},"institutions":[{"id":"https://openalex.org/I137867983","display_name":"Central University of Finance and Economics","ror":"https://ror.org/008e3hf02","country_code":"CN","type":"education","lineage":["https://openalex.org/I137867983"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ling Ding","raw_affiliation_strings":["Central University of Finance and Economics, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Central University of Finance and Economics, Beijing, China","institution_ids":["https://openalex.org/I137867983"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5100322854"],"corresponding_institution_ids":["https://openalex.org/I137867983"],"apc_list":null,"apc_paid":null,"fwci":4.071,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.94334405,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":"1","issue":"FSE","first_page":"586","last_page":"608"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9930999875068665,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6955089569091797},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.598877489566803},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.5388462543487549},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5181628465652466},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.47693222761154175},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.44951510429382324},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43145716190338135},{"id":"https://openalex.org/keywords/history","display_name":"History","score":0.10135716199874878}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6955089569091797},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.598877489566803},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.5388462543487549},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5181628465652466},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.47693222761154175},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.44951510429382324},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43145716190338135},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.10135716199874878},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C166957645","wikidata":"https://www.wikidata.org/wiki/Q23498","display_name":"Archaeology","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3643753","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643753","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3643753","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3643753","pdf_url":null,"source":{"id":"https://openalex.org/S4404663975","display_name":"Proceedings of the ACM on software engineering.","issn_l":"2994-970X","issn":["2994-970X"],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM on Software Engineering","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.5199999809265137}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W2170224888","https://openalex.org/W2747329762","https://openalex.org/W2953767132","https://openalex.org/W3089621332","https://openalex.org/W3090668753","https://openalex.org/W3105247453","https://openalex.org/W3118485687","https://openalex.org/W3160492491","https://openalex.org/W3161517159","https://openalex.org/W3161997752","https://openalex.org/W3168488662","https://openalex.org/W3170092793","https://openalex.org/W3177116043","https://openalex.org/W3194069451","https://openalex.org/W3194346579","https://openalex.org/W3194682511","https://openalex.org/W4205596491","https://openalex.org/W4210440021","https://openalex.org/W4283751459","https://openalex.org/W4284688961","https://openalex.org/W4284705844","https://openalex.org/W4284710241","https://openalex.org/W4285605356","https://openalex.org/W4308732630","https://openalex.org/W4384302789"],"related_works":["https://openalex.org/W4283262748","https://openalex.org/W2502773048","https://openalex.org/W4252362398","https://openalex.org/W48866389","https://openalex.org/W2117651364","https://openalex.org/W3112533414","https://openalex.org/W2490481324","https://openalex.org/W4226226396","https://openalex.org/W3153750606","https://openalex.org/W4308854837"],"abstract_inverted_index":{"Pre-trained":[0],"Large":[1],"Language":[2],"Models":[3],"(LLM)":[4],"have":[5],"achieved":[6],"remarkable":[7],"successes":[8],"in":[9,18,258],"several":[10],"domains.":[11],"However,":[12],"code-oriented":[13],"LLMs":[14,137,153],"are":[15,80,96],"often":[16],"heavy":[17],"computational":[19],"complexity,":[20],"and":[21,87,94,157,165,212,222,249,262,268],"quadratically":[22],"with":[23,180,318],"the":[24,27,33,39,43,47,53,58,64,71,84,88,92,99,103,107,111,116,141,152,171,181,188,200,214,219,237,282,292,319],"length":[25],"of":[26,36,74,98,143,174,190,202,260,294],"input":[28,34,48,65,108,144],"code":[29,49,133,145,163,175,194,206,266,334],"sequence.":[30],"Toward":[31],"simplifying":[32],"program":[35,66,109],"an":[37,75,148],"LLM,":[38,76],"state-of-the-art":[40,238,283],"approach":[41],"has":[42,176],"strategies":[44],"to":[45,62,299,316,337],"filter":[46],"tokens":[50,192,204],"based":[51],"on":[52,70,120,140,151,184,193,205,265,331],"attention":[54,72],"scores":[55],"given":[56],"by":[57,82,240,297],"LLM.":[59],"The":[60,226],"decision":[61],"simplify":[63],"should":[67],"not":[68,102],"rely":[69],"patterns":[73,79],"as":[77],"these":[78],"influenced":[81],"both":[83],"model":[85,93,117],"architecture":[86],"pre-training":[89],"dataset.":[90,123],"Since":[91],"dataset":[95],"part":[97],"solution":[100,135],"domain,":[101],"problem":[104],"domain":[105],"where":[106],"belongs,":[110],"outcome":[112],"may":[113],"differ":[114],"when":[115],"is":[118,208,277],"pre-trained":[119],"a":[121,131,177,328],"different":[122],"We":[124],"propose":[125],"S":[126,231,273,286],"lim":[127,232,274,287],"C":[128,233,275,288],"ode":[129,234,276,289],",":[130],"model-agnostic":[132,333],"simplification":[134,195,207,335],"for":[136,159,218,327],"that":[138,169,230],"depends":[139],"nature":[142],"tokens.":[146],"As":[147],"empirical":[149,227],"study":[150],"including":[154],"CodeBERT,":[155],"CodeT5,":[156],"GPT-4":[158,296],"two":[160],"main":[161],"tasks:":[162],"search":[164,267],"summarization,":[166,269],"we":[167,325],"reported":[168],"1)":[170],"removal":[172],"ratio":[173,183],"linear-like":[178],"relation":[179],"saving":[182],"training":[185],"time,":[186],"2)":[187],"impact":[189,201],"categorized":[191,203],"can":[196,235,290],"vary":[197],"significantly,":[198],"3)":[199],"task-specific":[209],"but":[210],"model-agnostic,":[211],"4)":[213],"above":[215],"findings":[216],"hold":[217],"paradigm\u2013prompt":[220],"engineering":[221],"interactive":[223],"in-context":[224],"learning.":[225],"results":[228,315],"showed":[229],"improve":[236],"technique":[239],"<mml:math":[241,250,300],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[242,251,301],"display=\"inline\">":[243,252,302],"<mml:mrow>":[244,253,303],"<mml:mn>9.46</mml:mn>":[245],"<mml:mtext>%</mml:mtext>":[246,255,305],"</mml:mrow>":[247,256,306],"</mml:math>":[248,257,307],"<mml:mn>5.15</mml:mn>":[254],"terms":[259],"MRR":[261],"BLEU":[263],"score":[264],"respectively.":[270],"More":[271],"importantly,":[272],"133":[278],"times":[279],"faster":[280],"than":[281],"approach.":[284],"Additionally,":[285],"reduce":[291],"cost":[293],"invoking":[295],"up":[298],"<mml:mn>24</mml:mn>":[304],"per":[308],"API":[309],"query,":[310],"while":[311],"still":[312],"producing":[313],"comparable":[314],"those":[317],"original":[320],"code.":[321],"With":[322],"this":[323],"result,":[324],"call":[326],"new":[329],"direction":[330],"code-based,":[332],"solutions":[336],"further":[338],"empower":[339],"LLMs.":[340]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-02-07T06:11:34.122080","created_date":"2025-10-10T00:00:00"}
