{"id":"https://openalex.org/W4394769102","doi":"https://doi.org/10.1145/3597503.3639226","title":"Lost in Translation: A Study of Bugs Introduced by Large Language Models while Translating Code","display_name":"Lost in Translation: A Study of Bugs Introduced by Large Language Models while Translating Code","publication_year":2024,"publication_date":"2024-04-12","ids":{"openalex":"https://openalex.org/W4394769102","doi":"https://doi.org/10.1145/3597503.3639226"},"language":"en","primary_location":{"id":"doi:10.1145/3597503.3639226","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3597503.3639226","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3597503.3639226","source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the IEEE/ACM 46th International Conference on Software Engineering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3597503.3639226","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5032491991","display_name":"Rangeet Pan","orcid":"https://orcid.org/0000-0002-8875-1225"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rangeet Pan","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0000-0002-8875-1225","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5028705274","display_name":"Ali Reza Ibrahimzada","orcid":"https://orcid.org/0000-0002-3797-818X"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali Reza Ibrahimzada","raw_affiliation_strings":["University of Illinois Urbana-Champaign, Champaign, Illinois, USA"],"raw_orcid":"https://orcid.org/0000-0002-3797-818X","affiliations":[{"raw_affiliation_string":"University of Illinois Urbana-Champaign, Champaign, Illinois, USA","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059912927","display_name":"Rahul Krishna","orcid":"https://orcid.org/0000-0002-5899-6651"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rahul Krishna","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0000-0002-5899-6651","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5061989853","display_name":"Divya Sankar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Divya Sankar","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0009-0006-3209-4154","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5092616535","display_name":"Lambert Pouguem Wassi","orcid":"https://orcid.org/0009-0004-0281-1779"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lambert Pouguem Wassi","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0009-0004-0281-1779","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068061267","display_name":"Michele Merler","orcid":"https://orcid.org/0000-0002-4358-8671"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michele Merler","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0000-0002-4358-8671","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5095381632","display_name":"Boris Sobolev","orcid":"https://orcid.org/0009-0009-7833-0230"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Boris Sobolev","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0009-0009-7833-0230","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087072038","display_name":"Raju Pavuluri","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Raju Pavuluri","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0009-0008-8810-2381","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103241577","display_name":"Saurabh Sinha","orcid":"https://orcid.org/0000-0003-4092-2643"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saurabh Sinha","raw_affiliation_strings":["IBM Research, Yorktown Heights, New York, USA"],"raw_orcid":"https://orcid.org/0000-0003-4092-2643","affiliations":[{"raw_affiliation_string":"IBM Research, Yorktown Heights, New York, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5058824250","display_name":"Reyhaneh Jabbarvand","orcid":"https://orcid.org/0000-0002-0668-8526"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Reyhaneh Jabbarvand","raw_affiliation_strings":["University of Illinois, Urbana-Champaign, Champaign, Illinois, USA"],"raw_orcid":"https://orcid.org/0000-0002-0668-8526","affiliations":[{"raw_affiliation_string":"University of Illinois, Urbana-Champaign, Champaign, Illinois, USA","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":10,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":66.8845,"has_fulltext":true,"cited_by_count":97,"citation_normalized_percentile":{"value":0.99941816,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"13"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9941999912261963,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7230016589164734},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.6414752006530762},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.5680098533630371},{"id":"https://openalex.org/keywords/translation","display_name":"Translation (biology)","score":0.5426521897315979},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5364654064178467},{"id":"https://openalex.org/keywords/java","display_name":"Java","score":0.48567187786102295},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.44303980469703674},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.4129478633403778},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4077221155166626}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7230016589164734},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.6414752006530762},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.5680098533630371},{"id":"https://openalex.org/C149364088","wikidata":"https://www.wikidata.org/wiki/Q185917","display_name":"Translation (biology)","level":4,"score":0.5426521897315979},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5364654064178467},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.48567187786102295},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.44303980469703674},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.4129478633403778},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4077221155166626},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C105580179","wikidata":"https://www.wikidata.org/wiki/Q188928","display_name":"Messenger RNA","level":3,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3597503.3639226","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3597503.3639226","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3597503.3639226","source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the IEEE/ACM 46th International Conference on Software Engineering","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3597503.3639226","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3597503.3639226","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3597503.3639226","source":null,"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the IEEE/ACM 46th International Conference on Software Engineering","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4394769102.pdf","grobid_xml":"https://content.openalex.org/works/W4394769102.grobid-xml"},"referenced_works_count":35,"referenced_works":["https://openalex.org/W62393983","https://openalex.org/W1668361173","https://openalex.org/W1972141422","https://openalex.org/W2074032109","https://openalex.org/W2088705091","https://openalex.org/W2090878800","https://openalex.org/W2247864914","https://openalex.org/W2597586341","https://openalex.org/W2850992922","https://openalex.org/W2913438996","https://openalex.org/W2951222477","https://openalex.org/W2968594320","https://openalex.org/W2980708516","https://openalex.org/W3091633490","https://openalex.org/W3096717112","https://openalex.org/W3098044990","https://openalex.org/W3148399464","https://openalex.org/W3154248444","https://openalex.org/W3161700111","https://openalex.org/W3184007005","https://openalex.org/W3196243846","https://openalex.org/W3208407575","https://openalex.org/W3209994714","https://openalex.org/W3211962263","https://openalex.org/W4206128413","https://openalex.org/W4221015471","https://openalex.org/W4221143046","https://openalex.org/W4225108562","https://openalex.org/W4281763794","https://openalex.org/W4287328196","https://openalex.org/W4308643319","https://openalex.org/W4312971267","https://openalex.org/W4313547646","https://openalex.org/W4382239980","https://openalex.org/W4382463671"],"related_works":["https://openalex.org/W2341492732","https://openalex.org/W3187193180","https://openalex.org/W106542691","https://openalex.org/W1699080303","https://openalex.org/W4297799326","https://openalex.org/W3116064965","https://openalex.org/W4287027380","https://openalex.org/W3193760048","https://openalex.org/W2883671469","https://openalex.org/W2728761353"],"abstract_inverted_index":{"Code":[0],"translation":[1,44,76,94,140,147,180,209,239],"aims":[2],"to":[3,12,31,46,64,111,115,124,177],"convert":[4],"source":[5],"code":[6,24,33,43,72,75,97,117,146,208,238,249],"from":[7,99,122,169],"one":[8],"programming":[9],"language":[10,20],"(PL)":[11],"another.":[13],"Given":[14],"the":[15,39,66,93,127,197,204,217,232],"promising":[16],"abilities":[17],"of":[18,41,68,79,95,133,139,159,199,206,219,224,235,247],"large":[19],"models":[21],"(LLMs)":[22],"in":[23,222,237,251,271],"synthesis,":[25],"researchers":[26],"are":[27,109],"exploring":[28],"their":[29,48,162],"potential":[30],"automate":[32,116],"translation.":[34],"The":[35],"prerequisite":[36],"for":[37,74,126,242],"advancing":[38],"state":[40],"LLM-based":[42,145,207],"is":[45,216],"understand":[47],"promises":[49],"and":[50,71,87,102,165,226,240,264],"limitations":[51,234],"over":[52],"existing":[53],"techniques.":[54],"To":[55,187],"that":[56,107,155,173,188,228],"end,":[57,189],"we":[58,190],"present":[59],"a":[60,192],"large-scale":[61],"empirical":[62],"study":[63,171,215],"investigate":[65],"ability":[67],"general":[69],"LLMs":[70,73,108,178,236],"across":[77],"pairs":[78],"different":[80],"languages,":[81],"including":[82],"C,":[83],"C++,":[84],"Go,":[85],"Java,":[86],"Python.":[88],"Our":[89,152,214,245],"study,":[90],"which":[91],"involves":[92],"1,700":[96,248],"samples":[98,250],"three":[100],"benchmarks":[101],"two":[103,157],"real-world":[104],"projects,":[105],"reveals":[106],"yet":[110],"be":[112],"reliably":[113],"used":[114],"translation---with":[118],"correct":[119],"translations":[120,135],"ranging":[121],"2.1%":[123],"47.3%":[125],"studied":[128],"LLMs.":[129],"Further":[130],"manual":[131],"investigation":[132],"unsuccessful":[134],"identifies":[136],"15":[137],"categories":[138],"bugs.":[141],"We":[142],"also":[143],"compare":[144],"with":[148,254],"traditional":[149],"non-LLM-based":[150],"approaches.":[151],"analysis":[153],"shows":[154],"these":[156],"classes":[158],"techniques":[160],"have":[161],"own":[163],"strengths":[164],"weaknesses.":[166],"Finally,":[167],"insights":[168,230],"our":[170],"suggest":[172],"providing":[174],"more":[175],"context":[176],"during":[179],"can":[181],"help":[182,268],"them":[183],"produce":[184],"better":[185],"results.":[186],"propose":[191],"prompt-crafting":[193],"approach":[194],"based":[195],"on":[196,212],"symptoms":[198],"erroneous":[200],"translations;":[201],"this":[202,272],"improves":[203],"performance":[205],"by":[210],"5.5%":[211],"average.":[213],"first":[218],"its":[220],"kind,":[221],"terms":[223],"scale":[225],"breadth,":[227],"provides":[229],"into":[231],"current":[233],"opportunities":[241],"improving":[243],"them.":[244],"dataset---consisting":[246],"five":[252],"PLs":[253],"10K+":[255],"tests,":[256],"43K+":[257],"translated":[258],"code,":[259],"1,748":[260],"manually":[261],"labeled":[262],"bugs,":[263],"1,365":[265],"bug-fix":[266],"pairs---can":[267],"drive":[269],"research":[270],"area.":[273]},"counts_by_year":[{"year":2026,"cited_by_count":8},{"year":2025,"cited_by_count":72},{"year":2024,"cited_by_count":17}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2024-04-13T00:00:00"}
