{"id":"https://openalex.org/W7105599063","doi":"https://doi.org/10.1109/tse.2025.3632074","title":"DockerFill: Automatically Completing Dockerfile Code With Syntax-Aware Multi-Task Learning","display_name":"DockerFill: Automatically Completing Dockerfile Code With Syntax-Aware Multi-Task Learning","publication_year":2025,"publication_date":"2025-11-13","ids":{"openalex":"https://openalex.org/W7105599063","doi":"https://doi.org/10.1109/tse.2025.3632074"},"language":null,"primary_location":{"id":"doi:10.1109/tse.2025.3632074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2025.3632074","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Yiwen Wu","orcid":"https://orcid.org/0000-0002-8652-116X"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yiwen Wu","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Yang Zhang","orcid":"https://orcid.org/0000-0002-3111-1534"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Zhang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Tao Wang","orcid":"https://orcid.org/0000-0002-8406-8672"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Tao Wang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Bo Ding","orcid":"https://orcid.org/0000-0002-1236-8318"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Ding","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]},{"author_position":"last","author":{"id":null,"display_name":"Huaimin Wang","orcid":"https://orcid.org/0000-0002-3245-1901"},"institutions":[{"id":"https://openalex.org/I170215575","display_name":"National University of Defense Technology","ror":"https://ror.org/05d2yfz11","country_code":"CN","type":"education","lineage":["https://openalex.org/I170215575"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huaimin Wang","raw_affiliation_strings":["National University of Defense Technology, Changsha, China"],"affiliations":[{"raw_affiliation_string":"National University of Defense Technology, Changsha, China","institution_ids":["https://openalex.org/I170215575"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I170215575"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.5836578,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"52","issue":"1","first_page":"137","last_page":"154"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.27619999647140503,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.27619999647140503,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11424","display_name":"Security and Verification in Computing","score":0.23399999737739563,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.11959999799728394,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.5769000053405762},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.4796000123023987},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.4327999949455261},{"id":"https://openalex.org/keywords/program-comprehension","display_name":"Program comprehension","score":0.4115999937057495},{"id":"https://openalex.org/keywords/software-maintenance","display_name":"Software maintenance","score":0.4097000062465668},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4023999869823456},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.3822999894618988},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.37119999527931213},{"id":"https://openalex.org/keywords/software-inspection","display_name":"Software inspection","score":0.36959999799728394}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9093000292778015},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.5769000053405762},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.48330000042915344},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.4796000123023987},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.4327999949455261},{"id":"https://openalex.org/C2777561058","wikidata":"https://www.wikidata.org/wiki/Q2652119","display_name":"Program comprehension","level":4,"score":0.4115999937057495},{"id":"https://openalex.org/C101317890","wikidata":"https://www.wikidata.org/wiki/Q940053","display_name":"Software maintenance","level":4,"score":0.4097000062465668},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4052000045776367},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4023999869823456},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.3822999894618988},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.37119999527931213},{"id":"https://openalex.org/C10272871","wikidata":"https://www.wikidata.org/wiki/Q929972","display_name":"Software inspection","level":5,"score":0.36959999799728394},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.3610000014305115},{"id":"https://openalex.org/C114408938","wikidata":"https://www.wikidata.org/wiki/Q333373","display_name":"Abstract syntax","level":3,"score":0.3452000021934509},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.3357999920845032},{"id":"https://openalex.org/C529173508","wikidata":"https://www.wikidata.org/wiki/Q638608","display_name":"Software development","level":3,"score":0.3255000114440918},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C119839945","wikidata":"https://www.wikidata.org/wiki/Q6545185","display_name":"Unique identifier","level":3,"score":0.30559998750686646},{"id":"https://openalex.org/C135257023","wikidata":"https://www.wikidata.org/wiki/Q691358","display_name":"Domain-specific language","level":2,"score":0.3041999936103821},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29809999465942383},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.29760000109672546},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.28519999980926514},{"id":"https://openalex.org/C35869016","wikidata":"https://www.wikidata.org/wiki/Q846636","display_name":"Software architecture","level":3,"score":0.28369998931884766},{"id":"https://openalex.org/C1009929","wikidata":"https://www.wikidata.org/wiki/Q179550","display_name":"Software bug","level":3,"score":0.2639000117778778},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.26339998841285706},{"id":"https://openalex.org/C150292731","wikidata":"https://www.wikidata.org/wiki/Q1342704","display_name":"Code review","level":5,"score":0.259799987077713},{"id":"https://openalex.org/C191727507","wikidata":"https://www.wikidata.org/wiki/Q24346","display_name":"Software regression","level":5,"score":0.25839999318122864},{"id":"https://openalex.org/C199519371","wikidata":"https://www.wikidata.org/wiki/Q942695","display_name":"Source lines of code","level":3,"score":0.2549999952316284},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2531999945640564},{"id":"https://openalex.org/C98183937","wikidata":"https://www.wikidata.org/wiki/Q2112188","display_name":"Program analysis","level":2,"score":0.2529999911785126},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.2506999969482422}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tse.2025.3632074","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2025.3632074","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5956218242645264,"display_name":"Industry, innovation and infrastructure","id":"https://metadata.un.org/sdg/9"}],"awards":[{"id":"https://openalex.org/G6599337021","display_name":null,"funder_award_id":"62202480","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":59,"referenced_works":["https://openalex.org/W1857789879","https://openalex.org/W1970607969","https://openalex.org/W2023953679","https://openalex.org/W2056198910","https://openalex.org/W2095752327","https://openalex.org/W2101105183","https://openalex.org/W2166597811","https://openalex.org/W2326454698","https://openalex.org/W2344444819","https://openalex.org/W2532824920","https://openalex.org/W2605856244","https://openalex.org/W2612808279","https://openalex.org/W2621143560","https://openalex.org/W2768572539","https://openalex.org/W2895799730","https://openalex.org/W2913340405","https://openalex.org/W2945843801","https://openalex.org/W2953829622","https://openalex.org/W2962784628","https://openalex.org/W2964150020","https://openalex.org/W2964322208","https://openalex.org/W3000045849","https://openalex.org/W3005628256","https://openalex.org/W3006548806","https://openalex.org/W3011564318","https://openalex.org/W3061716355","https://openalex.org/W3093464584","https://openalex.org/W3095937739","https://openalex.org/W3098605233","https://openalex.org/W3108032709","https://openalex.org/W3133662439","https://openalex.org/W3161997752","https://openalex.org/W3162311522","https://openalex.org/W3170092793","https://openalex.org/W3174414731","https://openalex.org/W3174524142","https://openalex.org/W3176740355","https://openalex.org/W3185176031","https://openalex.org/W3198685994","https://openalex.org/W3201286061","https://openalex.org/W4205371973","https://openalex.org/W4205688351","https://openalex.org/W4206054126","https://openalex.org/W4210493608","https://openalex.org/W4220672926","https://openalex.org/W4221166942","https://openalex.org/W4223909856","https://openalex.org/W4229003128","https://openalex.org/W4284709233","https://openalex.org/W4288079617","https://openalex.org/W4312793626","https://openalex.org/W4312942685","https://openalex.org/W4313563756","https://openalex.org/W4383334282","https://openalex.org/W4385245566","https://openalex.org/W4386080933","https://openalex.org/W4391579657","https://openalex.org/W4396214523","https://openalex.org/W4398186393"],"related_works":[],"abstract_inverted_index":{"As":[0],"a":[1,12,115,166],"kind":[2],"of":[3,11,96,105,168],"infrastructure-as-code,":[4],"Dockerfile":[5,62,97,106],"specifies":[6],"the":[7,23,100,195,214],"structure":[8],"and":[9,16,35,50,67,102,143,154,187,199],"functionality":[10],"built":[13],"Docker":[14,93],"image":[15],"thus":[17],"plays":[18],"an":[19,88],"important":[20],"role":[21],"in":[22,40,56,79],"containerized":[24],"software":[25],"development":[26],"process.":[27],"Nowadays":[28],"developers":[29,78],"need":[30],"to":[31,42,53,91,181,213],"spend":[32],"extra":[33],"time":[34],"effort":[36],"configuring":[37,80],"their":[38,43],"Dockerfiles":[39],"addition":[41],"regular":[44],"coding":[45],"work,":[46],"which":[47,138],"requires":[48],"knowledge":[49],"skills":[51],"orthogonal":[52],"those":[54],"entailed":[55],"other":[57],"software-related":[58],"experiences.":[59],"Poorly":[60],"written":[61],"code":[63],"often":[64],"introduces":[65],"errors":[66],"maintenance":[68],"costs.":[69],"However,":[70],"little":[71],"automated":[72],"support":[73],"is":[74],"available":[75],"for":[76,124,184,191],"assisting":[77],"Dockerfiles.":[81,172],"In":[82],"this":[83],"study,":[84],"we":[85,110,164],"first":[86],"conduct":[87],"online":[89],"survey":[90],"investigate":[92],"developers\u2019":[94],"perceptions":[95],"writing,":[98],"highlighting":[99],"needs":[101],"potential":[103],"benefits":[104],"auto-completion":[107],"techniques.":[108],"Then,":[109],"introduce":[111],"<sc":[112,127,160,203],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[113,128,161,204],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">DOCKERFILL</small>,":[114],"pre-trained":[116],"model":[117],"based":[118],"approach":[119],"that":[120,177],"provides":[121,179],"completion":[122,186],"suggestions":[123],"Dockerfile-specific":[125],"code.":[126],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">DOCKERFILL</small>":[129,205],"leverages":[130],"multi-layer":[131],"Transformer":[132],"architecture":[133],"with":[134],"syntax-aware":[135],"multi-task":[136],"learning,":[137],"includes":[139],"contextual":[140],"file":[141],"information":[142],"three":[144],"pre-training":[145],"tasks,":[146],"i.e.,":[147],"masked":[148,155],"language":[149],"modeling,":[150],"syntax":[151],"type":[152],"identification,":[153],"identifier":[156],"prediction.":[157],"To":[158],"evaluate":[159],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">DOCKERFILL</small>\u2019s":[162],"effectiveness,":[163],"collect":[165],"dataset":[167],"6,350":[169],"high-quality":[170],"real-world":[171],"Our":[173],"empirical":[174],"results":[175],"show":[176],"DOCKERFILL":[178],"up":[180],"52.38%":[182],"accuracy":[183],"token-level":[185],"19.69%":[188],"exact":[189],"match":[190],"line-level":[192],"completion,":[193],"outperforming":[194],"baselines":[196],"by":[197],"7.32%-37.67%":[198],"1.97%-19.69%,":[200],"respectively.":[201],"Also,":[202],"obtains":[206],"significantly":[207],"higher":[208],"human":[209],"evaluation":[210],"scores":[211],"compared":[212],"baselines.":[215]},"counts_by_year":[],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-11-13T00:00:00"}
