{"id":"https://openalex.org/W2134001185","doi":"https://doi.org/10.1145/2723742.2723758","title":"Naturalness of Natural Language Artifacts in Software","display_name":"Naturalness of Natural Language Artifacts in Software","publication_year":2015,"publication_date":"2015-02-18","ids":{"openalex":"https://openalex.org/W2134001185","doi":"https://doi.org/10.1145/2723742.2723758","mag":"2134001185"},"language":"en","primary_location":{"id":"doi:10.1145/2723742.2723758","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2723742.2723758","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th India Software Engineering Conference","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072230453","display_name":"Giriprasad Sridhara","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Giriprasad Sridhara","raw_affiliation_strings":["IBM Research","IBM Research, -"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"IBM Research, -","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031880575","display_name":"Vibha Singhal Sinha","orcid":null},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Vibha Singhal Sinha","raw_affiliation_strings":["IBM Research","IBM Research, -"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"IBM Research, -","institution_ids":["https://openalex.org/I1341412227"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065047775","display_name":"Senthil Mani","orcid":"https://orcid.org/0000-0002-9624-2623"},"institutions":[{"id":"https://openalex.org/I1341412227","display_name":"IBM (United States)","ror":"https://ror.org/05hh8d621","country_code":"US","type":"company","lineage":["https://openalex.org/I1341412227"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Senthil Mani","raw_affiliation_strings":["IBM Research","IBM Research, -"],"affiliations":[{"raw_affiliation_string":"IBM Research","institution_ids":[]},{"raw_affiliation_string":"IBM Research, -","institution_ids":["https://openalex.org/I1341412227"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5072230453"],"corresponding_institution_ids":["https://openalex.org/I1341412227"],"apc_list":null,"apc_paid":null,"fwci":0.7946,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.8112384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"156","last_page":"165"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12423","display_name":"Software Reliability and Analysis Research","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/naturalness","display_name":"Naturalness","score":0.9654936194419861},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.755169153213501},{"id":"https://openalex.org/keywords/perplexity","display_name":"Perplexity","score":0.5769968628883362},{"id":"https://openalex.org/keywords/commit","display_name":"Commit","score":0.5538288354873657},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5456576347351074},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.5420966148376465},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5315176248550415},{"id":"https://openalex.org/keywords/artifact","display_name":"Artifact (error)","score":0.4747743308544159},{"id":"https://openalex.org/keywords/string","display_name":"String (physics)","score":0.44216859340667725},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4301256239414215},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.40982869267463684},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.3237988352775574},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.2472078800201416},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.1918538212776184},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.09094458818435669}],"concepts":[{"id":"https://openalex.org/C134537474","wikidata":"https://www.wikidata.org/wiki/Q17144832","display_name":"Naturalness","level":2,"score":0.9654936194419861},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.755169153213501},{"id":"https://openalex.org/C100279451","wikidata":"https://www.wikidata.org/wiki/Q372193","display_name":"Perplexity","level":3,"score":0.5769968628883362},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.5538288354873657},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5456576347351074},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.5420966148376465},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5315176248550415},{"id":"https://openalex.org/C2779010991","wikidata":"https://www.wikidata.org/wiki/Q2720909","display_name":"Artifact (error)","level":2,"score":0.4747743308544159},{"id":"https://openalex.org/C157486923","wikidata":"https://www.wikidata.org/wiki/Q1376436","display_name":"String (physics)","level":2,"score":0.44216859340667725},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4301256239414215},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.40982869267463684},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3237988352775574},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.2472078800201416},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.1918538212776184},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.09094458818435669},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2723742.2723758","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2723742.2723758","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 8th India Software Engineering Conference","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W1608271177","https://openalex.org/W1970018430","https://openalex.org/W1970607969","https://openalex.org/W2012313588","https://openalex.org/W2050219469","https://openalex.org/W2059556545","https://openalex.org/W2082160726","https://openalex.org/W2117228548","https://openalex.org/W2118519983","https://openalex.org/W2125480321","https://openalex.org/W2128737833","https://openalex.org/W2128990852","https://openalex.org/W2142403498","https://openalex.org/W2160517961","https://openalex.org/W2166879716","https://openalex.org/W2167527590","https://openalex.org/W2284840260"],"related_works":["https://openalex.org/W2169518243","https://openalex.org/W2252095989","https://openalex.org/W4322096525","https://openalex.org/W2551914602","https://openalex.org/W4287323699","https://openalex.org/W4281893144","https://openalex.org/W2105076537","https://openalex.org/W2084531783","https://openalex.org/W2902731467","https://openalex.org/W2787311093"],"abstract_inverted_index":{"We":[0,37,94,222,265],"present":[1,95,110,267],"a":[2,41,127,171,224,268],"study":[3],"on":[4,98,246,262],"the":[5,8,48,86,112,121,141,150,232],"naturalness":[6,87,198],"of":[7,88,120,144,201,211,270,274],"natural":[9,21,128],"language":[10,22,129,234],"artifacts":[11,90],"in":[12,111,249],"software.":[13,93,135],"Naturalness":[14],"is":[15,85,126,132],"essentially":[16],"repetitiveness":[17],"or":[18,45,66],"predictability.":[19],"By":[20],"artifacts,":[23],"we":[24,124,229],"mean":[25],"source":[26,60,99,159,195,263],"code":[27,61,100,160],"comments,":[28,101],"revision":[29],"history":[30],"messages,":[31],"bug":[32,104,191,218],"reports":[33,183,192],"and":[34,108,115,158,168,217,281],"so":[35],"on.":[36],"measure":[38],"\"naturalness\"":[39],"using":[40,231],"standard":[42],"measure,":[43],"cross-entropy":[44],"perplexity":[46],"from":[47,82,163,184,193,242,258],"widely":[49],"used":[50],"N-Gram":[51,233],"models.":[52],"Previously,":[53],"Hindle":[54],"et":[55],"al.":[56],"demonstrated":[57],"empirically":[58],"that":[59,79,123,131,200,228,276],"was":[62],"comparatively":[63,179],"more":[64,69,188],"repetitive":[65],"regular":[67],"(i.e.,":[68],"natural)":[70],"when":[71],"compared":[72],"with":[73,92,134],"traditional":[74,175],"English":[75,176,203],"text.":[76,204],"A":[77],"question":[78,114,167],"logically":[80],"follows":[81],"their":[83],"work":[84],"other":[89],"associated":[91,133],"our":[96],"findings":[97,206],"commit":[102,247],"logs,":[103],"reports,":[105],"string":[106,155],"messages":[107,157,248],"content":[109],"popular":[113],"answer":[116],"forum,":[117],"StackOverflow.":[118],"Each":[119],"artifact":[122,130],"examine":[125],"However,":[136],"they":[137],"do":[138],"not":[139],"exhibit":[140],"same":[142],"amount":[143],"regularity":[145,189],"(naturalness).":[146],"Commit":[147],"logs":[148],"were":[149],"most":[151],"regular,":[152],"followed":[153],"by":[154],"literal":[156],"comments.":[161,264],"Content":[162],"StackOverflow":[164],"(viz.,":[165],"title,":[166],"answers)":[169],"showed":[170],"behavior":[172],"similar":[173],"to":[174,244,260,279,283],"text":[177],"i.e.,":[178],"lesser":[180],"regularity.":[181],"Bug":[182],"industrial":[185],"projects":[186],"exhibited":[187],"than":[190],"open":[194],"projects,":[196],"whose":[197],"resembled":[199],"typical":[202],"Our":[205],"have":[207],"implications":[208],"for":[209],"feasibility":[210],"building":[212],"tools":[213],"such":[214],"as":[215],"comment":[216],"report":[219],"completion":[220],"engines.":[221],"describe":[223],"next-word":[225],"prediction":[226],"tool":[227,237],"built":[230],"model.":[235],"This":[236],"achieved":[238,254],"an":[239,255],"accuracy":[240,256],"ranging":[241,257],"70":[243],"90%":[245],"different":[250],"projects.":[251],"It":[252],"also":[253,266],"56":[259],"78%":[261],"part":[269],"speech":[271],"based":[272],"analysis":[273],"words":[275],"are":[277],"easy":[278],"predict":[280],"difficult":[282],"predict.":[284]},"counts_by_year":[{"year":2016,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
