{"id":"https://openalex.org/W7135172733","doi":"https://doi.org/10.1109/access.2026.3673923","title":"AutoML-Pipeline: A RAG-Enhanced Code Generation Framework With Pre-Validation for Cloud-Native Machine Learning Workflows","display_name":"AutoML-Pipeline: A RAG-Enhanced Code Generation Framework With Pre-Validation for Cloud-Native Machine Learning Workflows","publication_year":2026,"publication_date":"2026-01-01","ids":{"openalex":"https://openalex.org/W7135172733","doi":"https://doi.org/10.1109/access.2026.3673923"},"language":"en","primary_location":{"id":"doi:10.1109/access.2026.3673923","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3673923","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2026.3673923","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101143179","display_name":"Wenyu Zhao","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Wenyu Zhao","raw_affiliation_strings":["Microsoft, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128931553","display_name":"Tingjie Chen","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tingjie Chen","raw_affiliation_strings":["Intel, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"Intel, Shanghai, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5126177787","display_name":"J. C. Yang","orcid":null},"institutions":[{"id":"https://openalex.org/I223532165","display_name":"University of Utah","ror":"https://ror.org/03r0ha626","country_code":"US","type":"education","lineage":["https://openalex.org/I223532165"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jie Si Yang","raw_affiliation_strings":["The University of Utah, Salt Lake City, UT, USA"],"affiliations":[{"raw_affiliation_string":"The University of Utah, Salt Lake City, UT, USA","institution_ids":["https://openalex.org/I223532165"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5128949271","display_name":"Lei Qiu","orcid":null},"institutions":[{"id":"https://openalex.org/I159389169","display_name":"Ningbo University of Technology","ror":"https://ror.org/037dym702","country_code":"CN","type":"education","lineage":["https://openalex.org/I159389169"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Qiu","raw_affiliation_strings":["Ningbo University of Technology, Ningbo, China"],"affiliations":[{"raw_affiliation_string":"Ningbo University of Technology, Ningbo, China","institution_ids":["https://openalex.org/I159389169"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5101143179"],"corresponding_institution_ids":["https://openalex.org/I4210113369"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":53.5189,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.99742153,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"14","issue":null,"first_page":"41932","last_page":"41945"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.8019000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.8019000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11689","display_name":"Adversarial Robustness in Machine Learning","score":0.021199999377131462,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12072","display_name":"Machine Learning and Algorithms","score":0.01600000075995922,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6111000180244446},{"id":"https://openalex.org/keywords/code-generation","display_name":"Code generation","score":0.5782999992370605},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.49880000948905945},{"id":"https://openalex.org/keywords/computational-learning-theory","display_name":"Computational learning theory","score":0.2840000092983246},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.27649998664855957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8511999845504761},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6111000180244446},{"id":"https://openalex.org/C133162039","wikidata":"https://www.wikidata.org/wiki/Q1061077","display_name":"Code generation","level":3,"score":0.5782999992370605},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.49880000948905945},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4555000066757202},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.44589999318122864},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.41819998621940613},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.3734000027179718},{"id":"https://openalex.org/C50292564","wikidata":"https://www.wikidata.org/wiki/Q2462783","display_name":"Computational learning theory","level":3,"score":0.2840000092983246},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.27649998664855957},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.27649998664855957},{"id":"https://openalex.org/C137287247","wikidata":"https://www.wikidata.org/wiki/Q1329550","display_name":"Static program analysis","level":4,"score":0.25859999656677246}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2026.3673923","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3673923","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:935e5e08f7c146a88bd74b57c273f4dc","is_oa":true,"landing_page_url":"https://doaj.org/article/935e5e08f7c146a88bd74b57c273f4dc","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 14, Pp 41932-41945 (2026)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2026.3673923","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2026.3673923","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,111],"proliferation":[1],"of":[2,43],"cloud-native":[3],"machine":[4],"learning":[5,84],"platforms":[6],"has":[7],"significantly":[8],"accelerated":[9],"model":[10],"development":[11],"and":[12,17,29,34,46,131,153,178,198],"deployment":[13],"cycles.":[14],"However,":[15],"constructing":[16],"maintaining":[18],"heterogeneous":[19],"pipeline":[20,96,168],"code":[21,38,52,74,105,143],"spanning":[22],"multiple":[23],"languages":[24],"(Python,":[25],"YAML,":[26],"Spark":[27],"SQL)":[28],"cloud-specific":[30],"configurations":[31],"remains":[32],"labor-intensive":[33],"error-prone.":[35],"Existing":[36],"LLM-based":[37],"generation":[39,75,228],"tools":[40],"lack":[41],"awareness":[42],"runtime":[44,219],"constraints":[45],"historical":[47],"execution":[48,97,125,151],"patterns,":[49],"frequently":[50],"producing":[51],"with":[53,82,165,217,221],"resource":[54,129,182],"misconfigurations":[55],"or":[56],"dependency":[57,133,154],"conflicts":[58,134],"that":[59,77,106,122,192],"fail":[60],"upon":[61],"deployment.":[62,137],"To":[63],"address":[64],"these":[65],"challenges,":[66],"we":[67],"propose":[68],"<italic":[69,118],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[70,119],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">AutoML-Pipeline</i>,":[71],"a":[72,90,116,145,171,209],"closed-loop":[73],"framework":[76,159],"integrates":[78],"Retrieval-Augmented":[79],"Generation":[80],"(RAG)":[81],"reinforcement":[83],"feedback":[85,146],"mechanisms.":[86],"Our":[87,206],"approach":[88],"leverages":[89],"knowledge":[91],"base":[92],"constructed":[93],"from":[94],"successful":[95],"logs":[98],"to":[99,108,127,185,203,225],"guide":[100],"GPT-4":[101,187],"in":[102,115,174,181],"generating":[103],"deployment-ready":[104],"adheres":[107],"platform-specific":[109],"constraints.":[110],"key":[112],"innovation":[113],"lies":[114],"novel":[117],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">Pre-validation":[120],"Agent</i>":[121],"employs":[123],"simulated":[124],"environments":[126],"predict":[128],"consumption":[130],"detect":[132],"before":[135],"actual":[136],"This":[138],"agent":[139,200],"iteratively":[140],"refines":[141],"generated":[142],"through":[144],"loop":[147],"informed":[148],"by":[149],"predicted":[150],"profiles":[152],"graphs.":[155],"We":[156],"evaluate":[157],"our":[158],"on":[160],"the":[161,194],"CodeSearchNet":[162],"dataset":[163],"augmented":[164],"Azure":[166],"ML":[167],"specifications,":[169],"demonstrating":[170],"43.7%":[172],"improvement":[173],"first-submission":[175],"success":[176],"rate":[177],"31.2%":[179],"reduction":[180],"over-provisioning":[183],"compared":[184],"vanilla":[186],"baselines.":[188],"Ablation":[189],"studies":[190],"confirm":[191],"both":[193],"RAG":[195],"retrieval":[196],"mechanism":[197],"pre-validation":[199],"contribute":[201],"substantially":[202],"performance":[204],"gains.":[205],"work":[207],"establishes":[208],"practical":[210],"paradigm":[211],"for":[212],"integrating":[213],"large":[214],"language":[215],"models":[216],"domain-specific":[218],"intelligence,":[220],"potential":[222],"applications":[223],"extending":[224],"other":[226],"infrastructure-as-code":[227],"tasks.":[229]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-03-27T14:29:43.386196","created_date":"2026-03-14T00:00:00"}
