{"id":"https://openalex.org/W4401863706","doi":"https://doi.org/10.1145/3637528.3671609","title":"MFTCoder: Boosting Code LLMs with Multitask Fine-Tuning","display_name":"MFTCoder: Boosting Code LLMs with Multitask Fine-Tuning","publication_year":2024,"publication_date":"2024-08-24","ids":{"openalex":"https://openalex.org/W4401863706","doi":"https://doi.org/10.1145/3637528.3671609"},"language":"en","primary_location":{"id":"doi:10.1145/3637528.3671609","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671609","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671609","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671609","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5012411460","display_name":"Bingchang Liu","orcid":"https://orcid.org/0009-0003-9380-6168"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Bingchang Liu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100771758","display_name":"Chaoyu Chen","orcid":"https://orcid.org/0009-0005-6133-4324"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chaoyu Chen","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020959143","display_name":"Zi Gong","orcid":"https://orcid.org/0009-0007-7142-8433"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zi Gong","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5051661772","display_name":"Cong Liao","orcid":"https://orcid.org/0009-0009-6393-9035"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cong Liao","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041233541","display_name":"Huan Wang","orcid":"https://orcid.org/0009-0003-3333-7195"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huan Wang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103003074","display_name":"Zhichao Lei","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhichao Lei","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077788322","display_name":"Ming Liang","orcid":"https://orcid.org/0000-0003-3622-1510"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ming Liang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103155183","display_name":"Dajun Chen","orcid":"https://orcid.org/0009-0000-9532-7636"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dajun Chen","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103119821","display_name":"Min Shen","orcid":"https://orcid.org/0009-0005-8418-1877"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Min Shen","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5039337842","display_name":"Hailian Zhou","orcid":"https://orcid.org/0009-0002-0476-4449"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hailian Zhou","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067065175","display_name":"Wei Jiang","orcid":"https://orcid.org/0009-0003-6605-9793"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei Jiang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100432180","display_name":"Hang Yu","orcid":"https://orcid.org/0000-0002-5639-0912"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hang Yu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100368377","display_name":"Jianguo Li","orcid":"https://orcid.org/0000-0002-8645-0680"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jianguo Li","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":13,"corresponding_author_ids":["https://openalex.org/A5012411460"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":5.2363,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.96078282,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"5430","last_page":"5441"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.995199978351593,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9265000224113464,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.9055114984512329},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.579552173614502},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.444505900144577},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.30027949810028076},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2515277564525604}],"concepts":[{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.9055114984512329},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.579552173614502},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.444505900144577},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30027949810028076},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2515277564525604},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3637528.3671609","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671609","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671609","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3637528.3671609","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3637528.3671609","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3637528.3671609","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM SIGKDD Conference on Knowledge Discovery and Data Mining","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Affordable and clean energy","score":0.41999998688697815,"id":"https://metadata.un.org/sdg/7"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4401863706.pdf"},"referenced_works_count":12,"referenced_works":["https://openalex.org/W2251324968","https://openalex.org/W2884886306","https://openalex.org/W2963430933","https://openalex.org/W2963854351","https://openalex.org/W2965024236","https://openalex.org/W2991309414","https://openalex.org/W3036917029","https://openalex.org/W4236965008","https://openalex.org/W4285225959","https://openalex.org/W4285294723","https://openalex.org/W4311887664","https://openalex.org/W4385562549"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2125652721","https://openalex.org/W1540371141","https://openalex.org/W4231274751","https://openalex.org/W1549363203","https://openalex.org/W2154063878","https://openalex.org/W2556012038","https://openalex.org/W1489772951","https://openalex.org/W1538046993"],"abstract_inverted_index":{"Code":[0,188],"LLMs":[1],"have":[2,114],"emerged":[3],"as":[4,102,173,194],"a":[5,74,132],"specialized":[6],"research":[7],"field,":[8],"with":[9,167],"remarkable":[10],"studies":[11],"dedicated":[12],"to":[13,29,59,160],"enhancing":[14],"model's":[15],"coding":[16],"capabilities":[17],"through":[18],"fine-tuning":[19,24,38,76,84,120,125,130,151,162],"on":[20,85,126,131,185],"pre-trained":[21],"models.":[22],"Previous":[23],"approaches":[25,57],"were":[26],"typically":[27],"tailored":[28],"specific":[30],"downstream":[31],"tasks":[32,128],"or":[33],"scenarios,":[34],"which":[35],"meant":[36],"separate":[37],"for":[39],"each":[40],"task,":[41],"requiring":[42],"extensive":[43],"training":[44,141],"resources":[45],"and":[46,53,82,108,129,148,175],"posing":[47],"challenges":[48,97],"in":[49,98,155],"terms":[50],"of":[51,135,195],"deployment":[52],"maintenance.":[54],"Furthermore,":[55],"these":[56,70],"failed":[58],"leverage":[60],"the":[61,182,186],"inherent":[62],"interconnectedness":[63],"among":[64],"different":[65],"code-related":[66],"tasks.":[67,87,136],"To":[68],"overcome":[69],"limitations,":[71],"we":[72,93],"present":[73],"multi-task":[75,99,119],"framework,":[77],"MFTCoder,":[78],"that":[79,117],"enables":[80],"simultaneous":[81],"parallel":[83],"multiple":[86],"By":[88],"incorporating":[89],"various":[90],"loss":[91],"functions,":[92],"effectively":[94],"address":[95],"common":[96],"learning,":[100],"such":[101,172],"data":[103,145],"imbalance,":[104],"varying":[105],"difficulty":[106],"levels,":[107],"inconsistent":[109],"convergence":[110],"speeds.":[111],"Extensive":[112],"experiments":[113],"conclusively":[115],"demonstrated":[116],"our":[118],"approach":[121],"outperforms":[122],"both":[123],"individual":[124],"single":[127],"mixed":[133],"ensemble":[134],"Moreover,":[137],"MFTCoder":[138,164,178,199],"offers":[139],"efficient":[140,144,150],"capabilities,":[142],"including":[143],"tokenization":[146],"modes":[147],"parameter":[149],"(PEFT)":[152],"techniques,":[153],"resulting":[154],"significantly":[156],"improved":[157],"speed":[158],"compared":[159],"traditional":[161],"methods.":[163],"seamlessly":[165],"integrates":[166],"several":[168],"mainstream":[169],"open-source":[170],"LLMs,":[171],"CodeLLama":[174],"Qwen.":[176],"Our":[177],"fine-tuned":[179],"CodeFuse-DeepSeek-33B":[180],"claimed":[181],"top":[183],"spot":[184],"Big":[187],"Models":[189],"Leaderboard":[190],"ranked":[191],"by":[192],"WinRate":[193],"January":[196],"30,":[197],"2024.":[198],"is":[200],"open-sourced":[201],"at":[202],"https://github.com/codefuse-ai/MFTCOder":[203]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2025-10-10T00:00:00"}
