{"id":"https://openalex.org/W4401352189","doi":"https://doi.org/10.1145/3643991.3644923","title":"Data Augmentation for Supervised Code Translation Learning","display_name":"Data Augmentation for Supervised Code Translation Learning","publication_year":2024,"publication_date":"2024-04-15","ids":{"openalex":"https://openalex.org/W4401352189","doi":"https://doi.org/10.1145/3643991.3644923"},"language":"en","primary_location":{"id":"doi:10.1145/3643991.3644923","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3643991.3644923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Mining Software Repositories","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5028469663","display_name":"Binger Chen","orcid":"https://orcid.org/0009-0007-7153-6100"},"institutions":[{"id":"https://openalex.org/I4577782","display_name":"Technische Universit\u00e4t Berlin","ror":"https://ror.org/03v4gjf40","country_code":"DE","type":"education","lineage":["https://openalex.org/I4577782"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Binger Chen","raw_affiliation_strings":["Technische Universit\u00e4t Berlin, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Technische Universit\u00e4t Berlin, Berlin, Germany","institution_ids":["https://openalex.org/I4577782"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055649136","display_name":"Jacek Go\u0142\u0119biowski","orcid":"https://orcid.org/0000-0001-8053-8318"},"institutions":[{"id":"https://openalex.org/I4210089985","display_name":"Amazon (Germany)","ror":"https://ror.org/00b9ktm87","country_code":"DE","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210089985"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Jacek Golebiowski","raw_affiliation_strings":["Amazon AWS, Berlin, Germany"],"affiliations":[{"raw_affiliation_string":"Amazon AWS, Berlin, Germany","institution_ids":["https://openalex.org/I4210089985"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5009128577","display_name":"Ziawasch Abedjan","orcid":"https://orcid.org/0000-0002-2846-1373"},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Ziawasch Abedjan","raw_affiliation_strings":["Leibniz Universit\u00e4t Hannover, Hanover, Germany"],"affiliations":[{"raw_affiliation_string":"Leibniz Universit\u00e4t Hannover, Hanover, Germany","institution_ids":["https://openalex.org/I114112103"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5028469663"],"corresponding_institution_ids":["https://openalex.org/I4577782"],"apc_list":null,"apc_paid":null,"fwci":0.7274,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.7528796,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"444","last_page":"456"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9936000108718872,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8066211342811584},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.6150546669960022},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5286909937858582},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5057921409606934},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4997560977935791},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.43928053975105286},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.32115620374679565}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8066211342811584},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.6150546669960022},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5286909937858582},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5057921409606934},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4997560977935791},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.43928053975105286},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32115620374679565},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3643991.3644923","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3643991.3644923","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 21st International Conference on Mining Software Repositories","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":27,"referenced_works":["https://openalex.org/W1972141422","https://openalex.org/W2135841285","https://openalex.org/W2164961799","https://openalex.org/W2247864914","https://openalex.org/W2576813563","https://openalex.org/W2610850660","https://openalex.org/W2747329762","https://openalex.org/W2759461255","https://openalex.org/W2795013376","https://openalex.org/W2795866244","https://openalex.org/W2888519496","https://openalex.org/W2891555348","https://openalex.org/W2946068894","https://openalex.org/W2963206679","https://openalex.org/W2963216553","https://openalex.org/W2986712369","https://openalex.org/W3033748891","https://openalex.org/W3034689979","https://openalex.org/W3035577668","https://openalex.org/W3098341425","https://openalex.org/W3101990291","https://openalex.org/W3162521174","https://openalex.org/W3167900421","https://openalex.org/W3174828871","https://openalex.org/W4220864612","https://openalex.org/W4300007223","https://openalex.org/W4384345643"],"related_works":["https://openalex.org/W2961085424","https://openalex.org/W4224009465","https://openalex.org/W4306674287","https://openalex.org/W4286629047","https://openalex.org/W4231937131","https://openalex.org/W323219885","https://openalex.org/W2063928587","https://openalex.org/W4205958290","https://openalex.org/W1487966966","https://openalex.org/W4384212932"],"abstract_inverted_index":{"Data-driven":[0],"program":[1],"translation":[2,63,85,129],"has":[3],"been":[4],"recently":[5],"the":[6,37,49,66,124],"focus":[7],"of":[8,11,27,33,51,69,126],"several":[9],"lines":[10],"research.":[12],"A":[13],"common":[14],"and":[15,39,87],"robust":[16],"strategy":[17],"is":[18,23],"supervised":[19,128],"learning.":[20],"However,":[21],"there":[22],"typically":[24],"a":[25,77,88],"lack":[26],"parallel":[28],"training":[29,104],"data,":[30,86],"i.e.,":[31],"pairs":[32],"code":[34,62,84,94,99],"snippets":[35],"in":[36,48],"source":[38],"target":[40],"language.":[41],"While":[42],"many":[43],"data":[44],"augmentation":[45,80],"techniques":[46,130],"exist":[47],"domain":[50],"natural":[52],"language":[53],"processing,":[54],"they":[55],"cannot":[56],"be":[57],"easily":[58],"adapted":[59],"to":[60,65,101,133],"tackle":[61],"due":[64],"unique":[67],"restrictions":[68],"programming":[70],"languages.":[71],"In":[72],"this":[73],"paper,":[74],"we":[75],"develop":[76],"novel":[78,89],"rule-based":[79],"approach":[81,91],"tailored":[82],"for":[83],"retrieval-based":[90],"that":[92,120],"combines":[93],"samples":[95],"from":[96],"unorganized":[97],"big":[98],"repositories":[100],"obtain":[102],"new":[103],"data.":[105],"Both":[106],"approaches":[107],"are":[108],"language-independent.":[109],"We":[110],"perform":[111],"an":[112],"extensive":[113],"empirical":[114],"evaluation":[115],"on":[116],"existing":[117],"Java-C#-benchmarks":[118],"showing":[119],"our":[121],"method":[122],"improves":[123],"accuracy":[125],"state-of-the-art":[127],"by":[131],"up":[132],"35%.":[134]},"counts_by_year":[{"year":2024,"cited_by_count":2}],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
