{"id":"https://openalex.org/W4393431152","doi":"https://doi.org/10.5281/zenodo.7865748","title":"SE Stopwords","display_name":"SE Stopwords","publication_year":2023,"publication_date":"2023-04-26","ids":{"openalex":"https://openalex.org/W4393431152","doi":"https://doi.org/10.5281/zenodo.7865748"},"language":"en","primary_location":{"id":"pmh:oai:zenodo.org:7865748","is_oa":true,"landing_page_url":"https://zenodo.org/record/7865748","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://zenodo.org/record/7865748","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5050710691","display_name":"Yaohou Fan","orcid":"https://orcid.org/0009-0006-0558-9733"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"Yaohou Fan","raw_affiliation_strings":["The University of Melbourne"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne","institution_ids":["https://openalex.org/I165779595"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019739552","display_name":"Chetan Arora","orcid":"https://orcid.org/0000-0003-1466-7386"},"institutions":[{"id":"https://openalex.org/I2801239119","display_name":"Australian Regenerative Medicine Institute","ror":"https://ror.org/02qa5kg76","country_code":"AU","type":"facility","lineage":["https://openalex.org/I2801037857","https://openalex.org/I2801239119","https://openalex.org/I56590836"]},{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Arora, Chetan","raw_affiliation_strings":["Monash University"],"affiliations":[{"raw_affiliation_string":"Monash University","institution_ids":["https://openalex.org/I2801239119","https://openalex.org/I56590836"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077658936","display_name":"Christoph Treude","orcid":"https://orcid.org/0000-0002-6919-2149"},"institutions":[{"id":"https://openalex.org/I165779595","display_name":"University of Melbourne","ror":"https://ror.org/01ej9dk98","country_code":"AU","type":"education","lineage":["https://openalex.org/I165779595"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Treude, Christoph","raw_affiliation_strings":["The University of Melbourne"],"affiliations":[{"raw_affiliation_string":"The University of Melbourne","institution_ids":["https://openalex.org/I165779595"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5050710691"],"corresponding_institution_ids":["https://openalex.org/I165779595"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.8501999974250793,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.8501999974250793,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10430","display_name":"Software Engineering Techniques and Practices","score":0.06830000132322311,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.016100000590085983,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.36711496114730835}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.36711496114730835}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:zenodo.org:7865748","is_oa":true,"landing_page_url":"https://zenodo.org/record/7865748","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},{"id":"doi:10.5281/zenodo.7865748","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.7865748","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"pmh:oai:zenodo.org:7865748","is_oa":true,"landing_page_url":"https://zenodo.org/record/7865748","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"info:eu-repo/semantics/other"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W2382290278","https://openalex.org/W2478288626","https://openalex.org/W4391913857","https://openalex.org/W2350741829","https://openalex.org/W2530322880"],"abstract_inverted_index":{"<strong>Overview</strong>":[0],"This":[1],"repository":[2],"contains":[3],"stopword":[4,193,258],"lists":[5,194,259,289],"specifically":[6],"tailored":[7],"for":[8,319,329,338,360,368,511],"natural":[9],"language":[10],"processing":[11],"(NLP)":[12],"tasks":[13],"applied":[14],"to":[15,21,68,162,178,196,263],"software":[16,34,102,114,124,269,283,321],"development":[17,270,284],"documents.":[18],"It":[19],"aims":[20],"enhance":[22],"the":[23,58,94,156,187,197,274,282,306,316,353,362,369,383,390,393],"efficiency":[24],"and":[25,41,45,110,135,276,300,521,524],"accuracy":[26],"of":[27,33,60,76,96,123,158,170,278,503],"NLP":[28,55,292],"applications":[29],"on":[30,348,382,532],"various":[31],"types":[32],"documentation,":[35],"including":[36],"bug":[37],"reports,":[38],"commit":[39],"messages,":[40],"API":[42],"documentation.":[43],"<strong>Background":[44],"Motivation</strong>":[46],"Stop":[47,202,250],"words,":[48],"deemed":[49],"non-predictive,":[50],"are":[51],"often":[52],"eliminated":[53],"in":[54,81,100,281,290],"tasks.":[56],"However,":[57],"definition":[59],"uninformative":[61,266],"vocabulary":[62],"remains":[63],"vague,":[64],"leading":[65],"most":[66],"algorithms":[67],"use":[69,287,502],"general":[70,164],"knowledge-based":[71],"stop":[72,77,97,138,152,165],"lists.":[73],"The":[74,146,183,373],"effectiveness":[75],"word":[78,98,203],"elimination,":[79],"particularly":[80],"domain-specific":[82,137,151,363],"settings,":[83],"is":[84],"debated":[85],"among":[86],"academics.":[87],"In":[88],"a":[89,101,163,378],"recent":[90],"paper,":[91],"we":[92,108],"investigated":[93],"usefulness":[95],"removal":[99],"engineering":[103,115,125,322],"context.":[104],"To":[105,286],"achieve":[106],"this,":[107],"replicated":[109],"experimented":[111],"with":[112],"three":[113],"research":[116,159],"tools":[117,160],"from":[118,130,268],"related":[119],"work.":[120],"A":[121],"corpus":[122],"domain-related":[126],"text":[127],"was":[128],"constructed":[129],"10,000":[131],"Stack":[132],"Overflow":[133],"questions,":[134],"200":[136],"words":[139,153,267],"were":[140],"identified":[141],"using":[142,150,191],"traditional":[143],"information-theoretic":[144],"methods.":[145],"results":[147,374],"demonstrated":[148],"that":[149],"significantly":[154],"improved":[155],"performance":[157,188],"compared":[161,195],"list.":[166],"Moreover,":[167],"17":[168,212],"out":[169,265],"19":[171,181,200],"evaluation":[172],"measures":[173],"showed":[174],"better":[175],"performance.":[176],"<strong>Comparison":[177],"Baseline":[179],"across":[180,199],"Metrics</strong>":[182],"table":[184],"below":[185],"summarizes":[186],"improvements":[189],"when":[190],"different":[192],"baseline":[198],"metrics.":[201],"list":[204],"Better":[205],"Worse":[206],"Same":[207],"SE":[208,215],"Domain":[209,216,223],"(TF-IDF)":[210],"(link)":[211,218,224,229,234,239,245],"1":[213,214,227,237,242,397],"(Poisson)":[217],"12":[219,253],"5":[220,241],"2":[221,248,454],"Technology":[222],"9":[225,226],"Large":[228],"11":[230,235],"8":[231],"0":[232],"Medium":[233],"7":[236,247],"Small":[238,244],"13":[240],"Very":[243],"10":[246],"No":[249],"Words":[251,510],"4":[252],"3":[254,475],"<strong>Usage":[255],"Instructions</strong>":[256],"These":[257],"can":[260],"be":[261,388],"used":[262,337],"filter":[264],"documents,":[271],"thereby":[272],"improving":[273],"understanding":[275],"analysis":[277],"textual":[279],"data":[280,318,328],"domain.":[285],"these":[288],"your":[291,298],"tasks,":[293],"simply":[294],"import":[295],"them":[296,302],"into":[297],"project":[299],"apply":[301],"as":[303,392],"filters":[304],"during":[305],"pre-processing":[307],"stage.":[308],"<strong>Folder":[309],"Structure</strong>":[310],"<pre><code>SE-stopwords":[311],"|--":[312,325,340,350,355],"data_for_replications":[313],"(contains":[314],"all":[315],"required":[317],"replicating":[320],"tools)":[323],"|":[324,333],"Maalej_Dataset":[326],"(original":[327],"app":[330],"review":[331],"tool)":[332],"`--":[334,357],"queries":[335],"(queries":[336],"RACKTool)":[339],"stackoverflow_questions":[341],"(more":[342],"than":[343],"10k":[344],"top":[345],"reviewed":[346],"questions":[347],"stackoverflow)":[349],"stopwords_lists":[351],"(all":[352],"stoplists)":[354],"replications":[356],"stackoverflow":[358],"(code":[359],"creating":[361],"corpus)":[364],"</code></pre>":[365,541],"<strong>Detailed":[366],"Results":[367],"Three":[370],"Replicated":[371],"Tools</strong>":[372],"may":[375],"vary":[376],"by":[377],"small":[379],"fraction":[380],"depending":[381],"trial,":[384],"but":[385],"they":[386,517],"should":[387],"approximately":[389],"same":[391],"tables":[394],"below.":[395],"<em><strong>Tool":[396,453,474],"(App":[398],"Review)</strong></em>":[399],"<strong>PD":[400],"(bug":[401],"report)</strong>":[402],"<strong>RT":[403],"(rating)</strong>":[404],"<strong>FR":[405],"(feature":[406],"request)</strong>":[407],"<strong>UE":[408],"(user":[409],"experience)</strong>":[410],"Pre":[411,414,417,420],"Rec":[412,415,418,421],"F1":[413,416,419,422],"<strong>SE":[423,438,460,467,480,483],"domain":[424,439,461,468,481,484],"(Poisson)</strong>":[425,462,482],"10.0%":[426],"37.5%":[427],"15.8%":[428],"72.1%":[429],"78.0%":[430],"74.9%":[431],"7.1%":[432],"29.8%":[433],"11.5%":[434],"11.6%":[435],"32.0%":[436],"17.0%":[437],"(TF-IDF)</strong>":[440,469,485],"10.7%":[441],"40.2%":[442],"16.9%":[443],"72.2%":[444],"78.2%":[445],"75.1%":[446],"7.9%":[447],"33.3%":[448],"12.8%":[449],"11.7%":[450],"32.5%":[451],"17.2%":[452],"(RACK)</strong></em>":[455],"<strong>Top-10</strong>":[456],"<strong>MRR@10</strong>":[457],"<strong>MAP@10</strong>":[458],"<strong>MR@K</strong>":[459],"83.85%":[463],"52.29%":[464],"43.27%":[465],"54.47%":[466],"84.17%":[470],"53.20%":[471],"45.82%":[472],"56.8%":[473],"(Requirements":[476],"Change":[477],"Impact":[478],"Analysis)</strong></em>":[479],"<strong>Query":[486,490,494],"2</strong>":[487],"0.588":[488,489],"4</strong>":[491],"0.981":[492,493],"5</strong>":[495],"0.602":[496,497],"<strong>Citation</strong>":[498],"If":[499],"you":[500],"make":[501],"this":[504],"work,":[505],"please":[506],"cite:":[507],"<pre><code>@inproceedings{fan2023stop,":[508],"title={Stop":[509],"Processing":[512],"Software":[513,535],"Engineering":[514,536],"Documents:":[515],"Do":[516],"Matter?},":[518],"author={Yaohou":[519],"Fan":[520],"Chetan":[522],"Arora":[523],"Christoph":[525],"Treude},":[526],"booktitle={2023":[527],"IEEE/ACM":[528],"2nd":[529],"International":[530],"Workshop":[531],"Natural":[533],"Language-Based":[534],"(NLBSE)},":[537],"year={2023},":[538],"organization={IEEE}":[539],"}":[540]},"counts_by_year":[],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2024-04-03T00:00:00"}
