{"id":"https://openalex.org/W2915514522","doi":"https://doi.org/10.1145/3306204","title":"Predicting program properties from 'big code'","display_name":"Predicting program properties from 'big code'","publication_year":2019,"publication_date":"2019-02-21","ids":{"openalex":"https://openalex.org/W2915514522","doi":"https://doi.org/10.1145/3306204","mag":"2915514522"},"language":"en","primary_location":{"id":"doi:10.1145/3306204","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3306204","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5051227985","display_name":"Veselin Raychev","orcid":null},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Veselin Raychev","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069901599","display_name":"Martin Vechev","orcid":"https://orcid.org/0000-0002-0054-9568"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Martin Vechev","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5003040843","display_name":"Andreas Krause","orcid":"https://orcid.org/0000-0001-7260-9673"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Andreas Krause","raw_affiliation_strings":["ETH Zurich, Zurich, Switzerland"],"affiliations":[{"raw_affiliation_string":"ETH Zurich, Zurich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5051227985"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":null,"apc_paid":null,"fwci":2.1561,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.87946483,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"62","issue":"3","first_page":"99","last_page":"107"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9919000267982483,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8929060697555542},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6118554472923279},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.5880793929100037},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5336547493934631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.522089421749115},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.48670056462287903},{"id":"https://openalex.org/keywords/identifier","display_name":"Identifier","score":0.4844091534614563},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4731115996837616},{"id":"https://openalex.org/keywords/conditional-random-field","display_name":"Conditional random field","score":0.46499505639076233},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.4359758496284485},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3348625600337982},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33049505949020386},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.20745939016342163},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.14217883348464966}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8929060697555542},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6118554472923279},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5880793929100037},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5336547493934631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.522089421749115},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.48670056462287903},{"id":"https://openalex.org/C154504017","wikidata":"https://www.wikidata.org/wiki/Q853614","display_name":"Identifier","level":2,"score":0.4844091534614563},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4731115996837616},{"id":"https://openalex.org/C152565575","wikidata":"https://www.wikidata.org/wiki/Q1124538","display_name":"Conditional random field","level":2,"score":0.46499505639076233},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.4359758496284485},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3348625600337982},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33049505949020386},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.20745939016342163},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.14217883348464966},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3306204","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3306204","pdf_url":null,"source":{"id":"https://openalex.org/S103482838","display_name":"Communications of the ACM","issn_l":"0001-0782","issn":["0001-0782","1557-7317"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Communications of the ACM","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.41999998688697815}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":10,"referenced_works":["https://openalex.org/W202191487","https://openalex.org/W1488217336","https://openalex.org/W2095844239","https://openalex.org/W2147880316","https://openalex.org/W2151322733","https://openalex.org/W2444132761","https://openalex.org/W2533383364","https://openalex.org/W2536055191","https://openalex.org/W4247950230","https://openalex.org/W4254504791"],"related_works":["https://openalex.org/W2356597680","https://openalex.org/W2093471820","https://openalex.org/W50079190","https://openalex.org/W4378651134","https://openalex.org/W2114846443","https://openalex.org/W3102147106","https://openalex.org/W2347460059","https://openalex.org/W2111726165","https://openalex.org/W3120493416","https://openalex.org/W2098192829"],"abstract_inverted_index":{"We":[0],"present":[1],"a":[2,18,47,95,154,181],"new":[3,184],"approach":[4,16],"for":[5,101,129,180],"predicting":[6,112,118],"program":[7,45,58,85,169],"properties":[8,30,59,170],"from":[9,21],"large":[10],"codebases":[11],"(aka":[12],"\"Big":[13,22,185],"Code\").":[14],"Our":[15],"learns":[17],"probabilistic":[19,72],"model":[20,27],"Code\"":[23,186],"and":[24,80,117,134,194],"uses":[25],"this":[26],"to":[28,42,52,69],"predict":[29],"of":[31,38,56,84,90,105,110,115,122,131,143,159,161,167,183],"new,":[32],"unseen":[33],"programs.":[34],"The":[35],"key":[36],"idea":[37],"our":[39,91,174],"work":[40,175],"is":[41],"transform":[43],"the":[44,54,108,165,178],"into":[46],"representation":[48],"that":[49],"allows":[50],"us":[51,68],"formulate":[53],"problem":[55,166],"inferring":[57,168],"as":[60,75,171,189],"structured":[61,172],"prediction":[62,83,97],"in":[63,107,141],"machine":[64],"learning.":[65],"This":[66],"enables":[67],"leverage":[70],"powerful":[71],"models":[73],"such":[74,188],"Conditional":[76],"Random":[77],"Fields":[78],"(CRFs)":[79],"perform":[81],"joint":[82],"properties.":[86],"As":[87],"an":[88],"example":[89],"approach,":[92],"we":[93],"built":[94],"scalable":[96],"engine":[98],"called":[99],"JSNICE":[100,125],"solving":[102],"two":[103],"kinds":[104],"tasks":[106],"context":[109],"JavaScript:":[111],"(syntactic)":[113],"names":[114,128],"identifiers":[116,133],"(semantic)":[119],"type":[120,136],"annotations":[121],"variables.":[123],"Experimentally,":[124],"predicts":[126],"correct":[127,140],"63%":[130],"name":[132],"its":[135,146],"annotation":[137],"predictions":[138],"are":[139],"81%":[142],"cases.":[144],"Since":[145],"public":[147],"release":[148],"at":[149],"http://jsnice.org,":[150],"JSNice":[151],"has":[152],"become":[153],"popular":[155],"system":[156],"with":[157],"hundreds":[158],"thousands":[160],"uses.":[162],"By":[163],"formulating":[164],"prediction,":[173],"opens":[176],"up":[177],"possibility":[179],"range":[182],"applications":[187],"de-obfuscators,":[190],"decompilers,":[191],"invariant":[192],"generators,":[193],"others.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":4},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":5},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
