{"id":"https://openalex.org/W4226106502","doi":"https://doi.org/10.1145/3510003.3510226","title":"SapientML","display_name":"SapientML","publication_year":2022,"publication_date":"2022-05-21","ids":{"openalex":"https://openalex.org/W4226106502","doi":"https://doi.org/10.1145/3510003.3510226"},"language":"en","primary_location":{"id":"doi:10.1145/3510003.3510226","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3510003.3510226","pdf_url":null,"source":{"id":"https://openalex.org/S4363608872","display_name":"Proceedings of the 44th International Conference on Software Engineering","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 44th International Conference on Software Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2202.10451","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5088836481","display_name":"Ripon K. Saha","orcid":"https://orcid.org/0000-0002-1648-3049"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ripon K. Saha","raw_affiliation_strings":["Fujitsu Research of America, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Research of America, Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040513640","display_name":"Akira Ura","orcid":"https://orcid.org/0009-0008-1675-0144"},"institutions":[{"id":"https://openalex.org/I4210159607","display_name":"Fujitsu (China)","ror":"https://ror.org/04w4yzw62","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210159607"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Akira Ura","raw_affiliation_strings":["Fujitsu Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Ltd","institution_ids":["https://openalex.org/I4210159607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026794504","display_name":"Sonal Mahajan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sonal Mahajan","raw_affiliation_strings":["Fujitsu Research of America, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Research of America, Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5054819831","display_name":"Chenguang Zhu","orcid":"https://orcid.org/0000-0002-7343-8279"},"institutions":[{"id":"https://openalex.org/I2252096349","display_name":"Fujitsu (Japan)","ror":"https://ror.org/038e2g226","country_code":"JP","type":"company","lineage":["https://openalex.org/I2252096349"]},{"id":"https://openalex.org/I4210159607","display_name":"Fujitsu (China)","ror":"https://ror.org/04w4yzw62","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210159607"]}],"countries":["CN","JP"],"is_corresponding":false,"raw_author_name":"Chenguang Zhu","raw_affiliation_strings":["The University of Texas at Austin and Fujitsu Ltd"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin and Fujitsu Ltd","institution_ids":["https://openalex.org/I2252096349","https://openalex.org/I4210159607"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101915882","display_name":"Linyi Li","orcid":"https://orcid.org/0000-0002-5403-3217"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Linyi Li","raw_affiliation_strings":["University of Illinois at Urbana-Champaign"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100338243","display_name":"Hu Yang","orcid":"https://orcid.org/0000-0001-6589-8534"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yang Hu","raw_affiliation_strings":["The University of Texas at Austin"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073098982","display_name":"Hiroaki Yoshida","orcid":"https://orcid.org/0000-0002-5370-7451"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hiroaki Yoshida","raw_affiliation_strings":["Fujitsu Research of America, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Research of America, Inc","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102146282","display_name":"Sarfraz Khurshid","orcid":null},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sarfraz Khurshid","raw_affiliation_strings":["The University of Texas at Austin"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"The University of Texas at Austin","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101870966","display_name":"Mukul R. Prasad","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mukul R. Prasad","raw_affiliation_strings":["Fujitsu Research of America, Inc"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Fujitsu Research of America, Inc","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.0382,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.77874371,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1932","last_page":"1944"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8286031484603882},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.8091177940368652},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7275272607803345},{"id":"https://openalex.org/keywords/pipeline-transport","display_name":"Pipeline transport","score":0.7193273901939392},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6952521800994873},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6682255864143372},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5511540770530701},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4859141409397125},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.11139383912086487}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8286031484603882},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.8091177940368652},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7275272607803345},{"id":"https://openalex.org/C175309249","wikidata":"https://www.wikidata.org/wiki/Q725864","display_name":"Pipeline transport","level":2,"score":0.7193273901939392},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6952521800994873},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6682255864143372},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5511540770530701},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4859141409397125},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.11139383912086487},{"id":"https://openalex.org/C87717796","wikidata":"https://www.wikidata.org/wiki/Q146326","display_name":"Environmental engineering","level":1,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3510003.3510226","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3510003.3510226","pdf_url":null,"source":{"id":"https://openalex.org/S4363608872","display_name":"Proceedings of the 44th International Conference on Software Engineering","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 44th International Conference on Software Engineering","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2202.10451","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.10451","pdf_url":"https://arxiv.org/pdf/2202.10451","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2202.10451","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2202.10451","pdf_url":"https://arxiv.org/pdf/2202.10451","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.44999998807907104,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":47,"referenced_works":["https://openalex.org/W60686164","https://openalex.org/W1991680573","https://openalex.org/W2041963100","https://openalex.org/W2101234009","https://openalex.org/W2102539288","https://openalex.org/W2143861926","https://openalex.org/W2151554678","https://openalex.org/W2182361439","https://openalex.org/W2309832917","https://openalex.org/W2550471858","https://openalex.org/W2553303224","https://openalex.org/W2620845889","https://openalex.org/W2747329762","https://openalex.org/W2759903677","https://openalex.org/W2798628553","https://openalex.org/W2884889320","https://openalex.org/W2902618648","https://openalex.org/W2913059114","https://openalex.org/W2936116159","https://openalex.org/W2949271354","https://openalex.org/W2950220059","https://openalex.org/W2954996726","https://openalex.org/W2962900518","https://openalex.org/W2963868406","https://openalex.org/W2964240296","https://openalex.org/W2966284335","https://openalex.org/W2979679630","https://openalex.org/W2979989546","https://openalex.org/W2980180246","https://openalex.org/W2997591727","https://openalex.org/W3040879595","https://openalex.org/W3043761819","https://openalex.org/W3080381655","https://openalex.org/W3082059448","https://openalex.org/W3098294082","https://openalex.org/W3100203766","https://openalex.org/W3104332093","https://openalex.org/W3121414853","https://openalex.org/W3125702975","https://openalex.org/W4211116959","https://openalex.org/W4213308398","https://openalex.org/W4231040899","https://openalex.org/W4241222526","https://openalex.org/W4295185264","https://openalex.org/W4297803580","https://openalex.org/W4392271976","https://openalex.org/W4399647672"],"related_works":["https://openalex.org/W4380433113","https://openalex.org/W4386072068","https://openalex.org/W252339960","https://openalex.org/W2390529043","https://openalex.org/W2378320433","https://openalex.org/W2358343511","https://openalex.org/W2051877971","https://openalex.org/W1970117064","https://openalex.org/W1787170397","https://openalex.org/W4292347844"],"abstract_inverted_index":{"Automatic":[0],"machine":[1,14],"learning,":[2],"or":[3,42,253,303],"AutoML,":[4,94],"holds":[5],"the":[6,11,20,26,89,133,155,163,167,195,251,259,262,275,282,294],"promise":[7],"of":[8,13,22,31,66,93,124,144,175,212,224,258,274],"truly":[9],"democratizing":[10],"use":[12],"learning":[15,185],"(ML),":[16],"by":[17,187],"substantially":[18],"automating":[19],"work":[21,53],"data":[23],"scientists.":[24],"However,":[25],"huge":[27],"combinatorial":[28],"search":[29,90,113],"space":[30,91],"candidate":[32],"pipelines":[33,147,201,214,300],"means":[34],"that":[35,60,108,180,248],"current":[36],"AutoML":[37,57,240],"techniques,":[38],"generate":[39,75],"sub-optimal":[40],"pipelines,":[41,72,161],"none":[43],"at":[44],"all,":[45],"especially":[46],"on":[47,83,110,221,256,272,281,290,301],"large,":[48,231],"complex":[49],"datasets.":[50],"In":[51,132],"this":[52,136],"we":[54],"propose":[55],"an":[56],"technique":[58],"SapientML,":[59],"can":[61],"learn":[62],"from":[63,154,191,234],"a":[64,76,80,84,97,103,122,130,141,149,176,182,209,222,270],"corpus":[65,186,211],"existing":[67],"datasets":[68,233],"and":[69,73,193,218,236,242],"their":[70],"human-written":[71],"efficiently":[74],"high-quality":[77],"pipeline":[78,150,271],"for":[79,202],"predictive":[81,204],"task":[82],"new":[85,203],"dataset.":[86],"To":[87],"combat":[88],"explosion":[92],"SapientML":[95,172,220,249,288],"employs":[96],"novel":[98],"divide-and-conquer":[99],"strategy":[100],"realized":[101],"as":[102,173],"three-stage":[104],"program":[105],"synthesis":[106],"approach,":[107],"reasons":[109],"successively":[111],"smaller":[112],"spaces.":[114],"The":[115],"first":[116],"stage":[117],"uses":[118,194],"meta-learning":[119],"to":[120,128,198,267,298],"predict":[121],"set":[123,223],"plausible":[125],"ML":[126],"components":[127],"constitute":[129],"pipeline.":[131],"second":[134,263],"stage,":[135,165],"is":[137,279],"then":[138,199],"refined":[139],"into":[140],"small":[142],"pool":[143],"viable":[145],"concrete":[146],"using":[148],"dataflow":[151],"model":[152],"derived":[153],"corpus.":[156],"Dynamically":[157],"evaluating":[158],"these":[159],"few":[160],"in":[162],"third":[164],"provides":[166],"best":[168,252,264],"solution.":[169],"We":[170,206],"instantiate":[171],"part":[174],"fully":[177],"automated":[178],"tool-chain":[179],"creates":[181],"cleaned,":[183],"labeled":[184],"mining":[188],"Kaggle,":[189,235],"learns":[190],"it,":[192],"learned":[196],"models":[197],"synthesize":[200],"tasks.":[205],"have":[207],"created":[208],"training":[210],"1,094":[213],"spanning":[215],"170":[216],"datasets,":[217,227],"evaluated":[219],"41":[225],"benchmark":[226],"including":[228],"10":[229,283],"new,":[230],"real-world":[232],"against":[237],"3":[238],"state-of-the-art":[239],"tools":[241,296],"4":[243,302],"baselines.":[244],"Our":[245],"evaluation":[246],"shows":[247],"produces":[250],"comparable":[254],"accuracy":[255],"27":[257],"benchmarks":[260],"while":[261],"tool":[265],"fails":[266],"even":[268],"produce":[269,299],"9":[273,291],"instances.":[276],"This":[277],"difference":[278],"amplified":[280],"most":[284],"challenging":[285],"benchmarks,":[286],"where":[287],"wins":[289],"instances":[292],"with":[293],"other":[295],"failing":[297],"more":[304],"benchmarks.":[305]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":6},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2022-05-05T00:00:00"}
