{"id":"https://openalex.org/W4414827803","doi":"https://doi.org/10.48550/arxiv.2504.08234","title":"Bringing Structure to Naturalness: On the Naturalness of ASTs","display_name":"Bringing Structure to Naturalness: On the Naturalness of ASTs","publication_year":2025,"publication_date":"2025-04-11","ids":{"openalex":"https://openalex.org/W4414827803","doi":"https://doi.org/10.48550/arxiv.2504.08234"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2504.08234","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.08234","pdf_url":"https://arxiv.org/pdf/2504.08234","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2504.08234","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051026501","display_name":"Profir-Petru P\u00e2r\u0163achi","orcid":"https://orcid.org/0000-0003-4940-6864"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"P\u00e2r\u0163achi, Profir-Petru","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5066053285","display_name":"Mahito Sugiyama","orcid":"https://orcid.org/0000-0001-5907-9831"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sugiyama, Mahito","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5051026501"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12000","display_name":"Systems Engineering Methodologies and Applications","score":0.11240000277757645,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12000","display_name":"Systems Engineering Methodologies and Applications","score":0.11240000277757645,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.8043000102043152},{"id":"https://openalex.org/keywords/abstract-syntax","display_name":"Abstract syntax","score":0.6934000253677368},{"id":"https://openalex.org/keywords/syntax","display_name":"Syntax","score":0.5774999856948853},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.527899980545044},{"id":"https://openalex.org/keywords/abstract-syntax-tree","display_name":"Abstract syntax tree","score":0.5011000037193298},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.47929999232292175},{"id":"https://openalex.org/keywords/security-token","display_name":"Security token","score":0.47290000319480896},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.4016999900341034}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.8043000102043152},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7520999908447266},{"id":"https://openalex.org/C114408938","wikidata":"https://www.wikidata.org/wiki/Q333373","display_name":"Abstract syntax","level":3,"score":0.6934000253677368},{"id":"https://openalex.org/C60048249","wikidata":"https://www.wikidata.org/wiki/Q37437","display_name":"Syntax","level":2,"score":0.5774999856948853},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.527899980545044},{"id":"https://openalex.org/C58646249","wikidata":"https://www.wikidata.org/wiki/Q127380","display_name":"Abstract syntax tree","level":3,"score":0.5011000037193298},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.4846999943256378},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.47929999232292175},{"id":"https://openalex.org/C48145219","wikidata":"https://www.wikidata.org/wiki/Q1335365","display_name":"Security token","level":2,"score":0.47290000319480896},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4235000014305115},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.4016999900341034},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3995000123977661},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.3562999963760376},{"id":"https://openalex.org/C124304363","wikidata":"https://www.wikidata.org/wiki/Q673661","display_name":"Abstraction","level":2,"score":0.35409998893737793},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3285999894142151},{"id":"https://openalex.org/C548217200","wikidata":"https://www.wikidata.org/wiki/Q251","display_name":"Java","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C197640229","wikidata":"https://www.wikidata.org/wiki/Q2534066","display_name":"Predictability","level":2,"score":0.3160000145435333},{"id":"https://openalex.org/C113174947","wikidata":"https://www.wikidata.org/wiki/Q2859736","display_name":"Tree (set theory)","level":2,"score":0.31119999289512634},{"id":"https://openalex.org/C11742125","wikidata":"https://www.wikidata.org/wiki/Q1195374","display_name":"Syntax error","level":4,"score":0.3077999949455261},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2874999940395355},{"id":"https://openalex.org/C133237599","wikidata":"https://www.wikidata.org/wiki/Q2295111","display_name":"Code smell","level":5,"score":0.26080000400543213},{"id":"https://openalex.org/C18552078","wikidata":"https://www.wikidata.org/wiki/Q255615","display_name":"Code-switching","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2508000135421753}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2504.08234","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.08234","pdf_url":"https://arxiv.org/pdf/2504.08234","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2504.08234","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2504.08234","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2504.08234","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2504.08234","pdf_url":"https://arxiv.org/pdf/2504.08234","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Source":[0],"code":[1,13,33,42,64,66,81,92],"comes":[2],"in":[3,129],"different":[4],"shapes":[5],"and":[6,105,122],"forms.":[7],"Previous":[8],"research":[9,117,158],"has":[10,53],"already":[11],"shown":[12],"to":[14,57,173,183],"be":[15,35,102,196],"more":[16],"predictable":[17],"than":[18],"natural":[19],"language":[20,185],"as":[21,23,142,165],"well":[22],"highlighted":[24,155],"its":[25],"statistical":[26],"predictability":[27],"at":[28],"the":[29,39,59,109,130,151,184],"token":[30,153],"level:":[31],"source":[32],"can":[34,195],"natural.":[36,95],"More":[37],"recently,":[38],"structure":[40],"of":[41,73,80,91,132],"--":[43,52],"control":[44],"flow,":[45],"syntax":[46,49,152],"graphs,":[47],"abstract":[48],"trees":[50],"etc.":[51,70],"been":[54],"successfully":[55],"used":[56],"improve":[58],"state-of-the-art":[60,200],"on":[61,202],"numerous":[62],"tasks:":[63],"suggestion,":[65],"summarisation,":[67],"method":[68],"naming":[69,115],"This":[71],"body":[72],"work":[74],"implicitly":[75],"assumes":[76,119],"that":[77,87,98,118,177],"structured":[78,89],"representations":[79],"are":[82,144],"similarly":[83],"statistically":[84],"predictable,":[85],"i.e.":[86],"a":[88],"view":[90,100],"is":[93,181],"also":[94,126],"We":[96],"consider":[97],"this":[99,120],"should":[101],"made":[103],"explicit":[104],"propose":[106],"directly":[107],"studying":[108],"Structured":[110],"Naturalness":[111],"Hypothesis.":[112],"Beyond":[113],"just":[114],"existing":[116],"hypothesis":[121],"formulating":[123],"it,":[124],"we":[125,169,189],"provide":[127],"evidence":[128],"case":[131],"trees:":[133],"TreeLSTM":[134],"models":[135,148,172],"over":[136],"ASTs":[137],"for":[138,198],"some":[139],"languages,":[140,163],"such":[141,164,192],"Ruby,":[143],"competitive":[145],"with":[146],"$n$-gram":[147],"while":[149,206],"handling":[150],"issue":[154],"by":[156],"previous":[157],"'for":[159],"free'.":[160],"For":[161],"other":[162],"Java":[166],"or":[167],"Python,":[168],"find":[170],"tree":[171],"perform":[174],"worse,":[175],"suggesting":[176],"downstream":[178],"task":[179],"improvement":[180],"uncorrelated":[182],"modelling":[186],"task.":[187],"Further,":[188],"show":[190],"how":[191],"naturalness":[193],"signals":[194],"employed":[197],"near":[199],"results":[201],"just-in-time":[203],"defect":[204],"prediction":[205],"forgoing":[207],"manual":[208],"feature":[209],"engineering":[210],"work.":[211]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}
