{"id":"https://openalex.org/W4385750033","doi":"https://doi.org/10.1145/3583780.3614869","title":"Evaluating and Optimizing the Effectiveness of Neural Machine Translation in Supporting Code Retrieval Models: A Study on the CAT Benchmark","display_name":"Evaluating and Optimizing the Effectiveness of Neural Machine Translation in Supporting Code Retrieval Models: A Study on the CAT Benchmark","publication_year":2023,"publication_date":"2023-10-21","ids":{"openalex":"https://openalex.org/W4385750033","doi":"https://doi.org/10.1145/3583780.3614869"},"language":"en","primary_location":{"id":"doi:10.1145/3583780.3614869","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3583780.3614869","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614869","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614869","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101722182","display_name":"Hung Phan","orcid":"https://orcid.org/0000-0001-7464-1597"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hung Phan","raw_affiliation_strings":["Iowa State University, Ames, IA, USA"],"affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5079359777","display_name":"Ali Jannesari","orcid":"https://orcid.org/0000-0001-8672-5317"},"institutions":[{"id":"https://openalex.org/I173911158","display_name":"Iowa State University","ror":"https://ror.org/04rswrd78","country_code":"US","type":"education","lineage":["https://openalex.org/I173911158"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Ali Jannesari","raw_affiliation_strings":["Iowa State University, Ames, IA, USA"],"affiliations":[{"raw_affiliation_string":"Iowa State University, Ames, IA, USA","institution_ids":["https://openalex.org/I173911158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5101722182"],"corresponding_institution_ids":["https://openalex.org/I173911158"],"apc_list":null,"apc_paid":null,"fwci":0.465,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.68196185,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"2055","last_page":"2064"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9945999979972839,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9944000244140625,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8695604801177979},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7400299310684204},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5836008787155151},{"id":"https://openalex.org/keywords/abstract-syntax-tree","display_name":"Abstract syntax tree","score":0.5692837834358215},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.49407437443733215},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4704655110836029},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.4689091444015503},{"id":"https://openalex.org/keywords/python","display_name":"Python (programming language)","score":0.4172234833240509},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3871331214904785},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.19642969965934753}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8695604801177979},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7400299310684204},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5836008787155151},{"id":"https://openalex.org/C58646249","wikidata":"https://www.wikidata.org/wiki/Q127380","display_name":"Abstract syntax tree","level":3,"score":0.5692837834358215},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.49407437443733215},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4704655110836029},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4689091444015503},{"id":"https://openalex.org/C519991488","wikidata":"https://www.wikidata.org/wiki/Q28865","display_name":"Python (programming language)","level":2,"score":0.4172234833240509},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3871331214904785},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.19642969965934753}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3583780.3614869","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3583780.3614869","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614869","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2308.04693","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2308.04693","pdf_url":"https://arxiv.org/pdf/2308.04693","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"doi:10.1145/3583780.3614869","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1145/3583780.3614869","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3583780.3614869","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 32nd ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G801708531","display_name":null,"funder_award_id":"2211982","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4385750033.pdf","grobid_xml":"https://content.openalex.org/works/W4385750033.grobid-xml"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W40327287","https://openalex.org/W2032175749","https://openalex.org/W2090878800","https://openalex.org/W2101746535","https://openalex.org/W2133459682","https://openalex.org/W2493916176","https://openalex.org/W2888557792","https://openalex.org/W2894732341","https://openalex.org/W2911468826","https://openalex.org/W2942870440","https://openalex.org/W2963212250","https://openalex.org/W2964150020","https://openalex.org/W2970575144","https://openalex.org/W3036369531","https://openalex.org/W3098605233","https://openalex.org/W3185835445","https://openalex.org/W3195727321","https://openalex.org/W4221166942","https://openalex.org/W4283363198","https://openalex.org/W4299358159","https://openalex.org/W4308641647","https://openalex.org/W4308643152","https://openalex.org/W4313563421","https://openalex.org/W4313563788","https://openalex.org/W4383745540","https://openalex.org/W4394638297"],"related_works":["https://openalex.org/W2207495067","https://openalex.org/W1906486629","https://openalex.org/W2789551765","https://openalex.org/W4310447811","https://openalex.org/W4389751695","https://openalex.org/W4247241040","https://openalex.org/W238463869","https://openalex.org/W2046704574","https://openalex.org/W1535099794","https://openalex.org/W2165004968"],"abstract_inverted_index":{"Neural":[0],"Machine":[1],"Translation":[2],"(NMT)":[3],"is":[4],"widely":[5],"applied":[6],"in":[7,29,38,46,53,64,81,85,122,131,167],"software":[8],"engineering":[9],"tasks.":[10],"The":[11],"effectiveness":[12],"of":[13,27,36,79,96,129,135,145,154,206,216,229],"NMT":[14,43,80,112,130,163,204],"for":[15],"code":[16,63,68,172,190,196,219],"retrieval":[17],"relies":[18],"on":[19,179],"the":[20,25,30,34,39,77,86,93,127,160,194,212,233],"ability":[21],"to":[22,33,55,61,176,187,224],"learn":[23],"from":[24,57,193],"sequence":[26,35],"tokens":[28,37,173],"source":[31,62,136],"language":[32,59],"target":[40],"language.":[41],"While":[42],"performs":[44,164],"well":[45],"pseudocode-to-code":[47],"translation[17],":[48],"it":[49],"might":[50],"have":[51],"challenges":[52],"learning":[54,132,168,207],"translate":[56],"natural":[58,82],"query":[60],"newly":[65,87],"curated":[66,88],"real-world":[67],"documentation/":[69],"implementation":[70],"datasets.":[71],"In":[72],"this":[73,123],"work,":[74],"we":[75,138,183],"analyze":[76],"performance":[78],"language-to-code":[83],"translation":[84],"CAT":[89,234],"benchmark[31]":[90],"that":[91,111,159],"includes":[92],"optimized":[94],"versions":[95],"three":[97],"Java":[98],"datasets":[99],"TLCodeSum,":[100],"CodeSearchNet,":[101],"Funcom,":[102],"and":[103,119,201,226],"a":[104,142,152],"Python":[105],"dataset":[106],"PCSD.":[107],"Our":[108,203],"evaluation":[109],"shows":[110],"has":[113],"low":[114],"accuracy,":[115],"measured":[116],"by":[117,222],"CrystalBLEU[10]":[118],"Meteor[9]":[120],"metrics":[121],"task.":[124],"To":[125],"alleviate":[126],"duty":[128],"complex":[133],"representation":[134,144],"code,":[137],"propose":[139],"ASTTrans":[140,169,185,208],"Representation,":[141],"tailored":[143],"an":[146],"Abstract":[147],"Syntax":[148],"Tree":[149],"(AST)":[150],"using":[151,199],"subset":[153],"non-terminal":[155],"nodes.":[156],"We":[157],"show":[158],"classical":[161],"approach":[162],"significantly":[165],"better":[166],"Representation":[170,186,209],"over":[171,232],"with":[174],"up":[175,223],"36%":[177],"improvement":[178],"Meteor":[180],"score.":[181],"Moreover,":[182],"leverage":[184],"conduct":[188],"combined":[189],"search":[191,197,220],"processes":[192,198,221],"state-of-the-art":[195,218],"GraphCodeBERT[13],":[200],"UniXcoder[12].":[202],"models":[205],"can":[210],"boost":[211],"Mean":[213],"Reciprocal":[214],"Rank":[215],"these":[217],"3.08%":[225],"improve":[227],"23.08%":[228],"queries'":[230],"results":[231],"benchmark.":[235]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
