{"id":"https://openalex.org/W2945843801","doi":"https://doi.org/10.1109/tse.2019.2918536","title":"A Large Scale Study of Long-Time Contributor Prediction for GitHub Projects","display_name":"A Large Scale Study of Long-Time Contributor Prediction for GitHub Projects","publication_year":2019,"publication_date":"2019-05-23","ids":{"openalex":"https://openalex.org/W2945843801","doi":"https://doi.org/10.1109/tse.2019.2918536","mag":"2945843801"},"language":"en","primary_location":{"id":"doi:10.1109/tse.2019.2918536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2019.2918536","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007075465","display_name":"Lingfeng Bao","orcid":"https://orcid.org/0000-0003-1846-0921"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Lingfeng Bao","raw_affiliation_strings":["School of Computer & Computing Science, Zhejiang University City College, Hangzhou Shi, Zhejiang Sheng, China"],"affiliations":[{"raw_affiliation_string":"School of Computer & Computing Science, Zhejiang University City College, Hangzhou Shi, Zhejiang Sheng, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006669765","display_name":"Xin Xia","orcid":"https://orcid.org/0000-0002-6302-3256"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xin Xia","raw_affiliation_strings":["Faculty of Information Technology, Monash University, Clayton, VIC, Australia"],"affiliations":[{"raw_affiliation_string":"Faculty of Information Technology, Monash University, Clayton, VIC, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081036622","display_name":"David Lo","orcid":"https://orcid.org/0000-0002-4367-7201"},"institutions":[{"id":"https://openalex.org/I79891267","display_name":"Singapore Management University","ror":"https://ror.org/050qmg959","country_code":"SG","type":"education","lineage":["https://openalex.org/I79891267"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"David Lo","raw_affiliation_strings":["School of Information Systems, Singapore Management University, Singapore, Singapore"],"affiliations":[{"raw_affiliation_string":"School of Information Systems, Singapore Management University, Singapore, Singapore","institution_ids":["https://openalex.org/I79891267"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5084524791","display_name":"Gail C. Murphy","orcid":"https://orcid.org/0000-0001-6768-2649"},"institutions":[{"id":"https://openalex.org/I141945490","display_name":"University of British Columbia","ror":"https://ror.org/03rmrcq20","country_code":"CA","type":"education","lineage":["https://openalex.org/I141945490"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Gail C. Murphy","raw_affiliation_strings":["Department of Computer Science, University of British Columbia, Vancouver, BC, Canada"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of British Columbia, Vancouver, BC, Canada","institution_ids":["https://openalex.org/I141945490"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5007075465"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":11.6246,"has_fulltext":false,"cited_by_count":78,"citation_normalized_percentile":{"value":0.98416474,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"47","issue":"6","first_page":"1277","last_page":"1298"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11675","display_name":"Open Source Software Innovations","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1706","display_name":"Computer Science Applications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":0.9794999957084656,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7985042929649353},{"id":"https://openalex.org/keywords/commit","display_name":"Commit","score":0.7937257289886475},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.6719685792922974},{"id":"https://openalex.org/keywords/random-forest","display_name":"Random forest","score":0.6236968040466309},{"id":"https://openalex.org/keywords/open-source","display_name":"Open source","score":0.5249633193016052},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.4585130214691162},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.44680726528167725},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4346431493759155},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.4323250353336334},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.41860663890838623},{"id":"https://openalex.org/keywords/interval","display_name":"Interval (graph theory)","score":0.41799840331077576},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4002707898616791},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39932429790496826},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.36860188841819763},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2636473774909973},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.16595381498336792},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.10763496160507202}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7985042929649353},{"id":"https://openalex.org/C153180980","wikidata":"https://www.wikidata.org/wiki/Q19776675","display_name":"Commit","level":2,"score":0.7937257289886475},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.6719685792922974},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.6236968040466309},{"id":"https://openalex.org/C3018397939","wikidata":"https://www.wikidata.org/wiki/Q3644502","display_name":"Open source","level":3,"score":0.5249633193016052},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.4585130214691162},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.44680726528167725},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4346431493759155},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.4323250353336334},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.41860663890838623},{"id":"https://openalex.org/C2778067643","wikidata":"https://www.wikidata.org/wiki/Q166507","display_name":"Interval (graph theory)","level":2,"score":0.41799840331077576},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4002707898616791},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39932429790496826},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.36860188841819763},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2636473774909973},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.16595381498336792},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.10763496160507202},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C114614502","wikidata":"https://www.wikidata.org/wiki/Q76592","display_name":"Combinatorics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tse.2019.2918536","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tse.2019.2918536","pdf_url":null,"source":{"id":"https://openalex.org/S8351582","display_name":"IEEE Transactions on Software Engineering","issn_l":"0098-5589","issn":["0098-5589","1939-3520","2326-3881"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320439","host_organization_name":"IEEE Computer Society","host_organization_lineage":["https://openalex.org/P4310320439","https://openalex.org/P4310319808"],"host_organization_lineage_names":["IEEE Computer Society","Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Software Engineering","raw_type":"journal-article"},{"id":"pmh:oai:ink.library.smu.edu.sg:sis_research-5362","is_oa":false,"landing_page_url":"https://ink.library.smu.edu.sg/sis_research/4359","pdf_url":null,"source":{"id":"https://openalex.org/S4377196871","display_name":"Institutional Knowledge (InK) - Institutional Knowledge at Singapore Management University (Singapore Management University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I79891267","host_organization_name":"Singapore Management University","host_organization_lineage":["https://openalex.org/I79891267"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"https://doi.org/10.1109/TSE.2019.2918536","raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7128210980","display_name":null,"funder_award_id":"2018YFB1003904","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":79,"referenced_works":["https://openalex.org/W38467096","https://openalex.org/W156578896","https://openalex.org/W1507255258","https://openalex.org/W1701984984","https://openalex.org/W1873057782","https://openalex.org/W1902482618","https://openalex.org/W1909497710","https://openalex.org/W1972386298","https://openalex.org/W1978859404","https://openalex.org/W1989862714","https://openalex.org/W1991726357","https://openalex.org/W1996014455","https://openalex.org/W1999830936","https://openalex.org/W2004824191","https://openalex.org/W2008506283","https://openalex.org/W2008946935","https://openalex.org/W2025576383","https://openalex.org/W2026801283","https://openalex.org/W2038917214","https://openalex.org/W2040900804","https://openalex.org/W2048456683","https://openalex.org/W2056944867","https://openalex.org/W2063876764","https://openalex.org/W2074218040","https://openalex.org/W2075780741","https://openalex.org/W2087194317","https://openalex.org/W2093249572","https://openalex.org/W2098950049","https://openalex.org/W2102193394","https://openalex.org/W2105776892","https://openalex.org/W2106013472","https://openalex.org/W2107226556","https://openalex.org/W2107294940","https://openalex.org/W2109553965","https://openalex.org/W2116484544","https://openalex.org/W2118283821","https://openalex.org/W2125121913","https://openalex.org/W2126957654","https://openalex.org/W2130743551","https://openalex.org/W2133990480","https://openalex.org/W2140190241","https://openalex.org/W2142827986","https://openalex.org/W2150874999","https://openalex.org/W2152228029","https://openalex.org/W2158698691","https://openalex.org/W2158760041","https://openalex.org/W2162563790","https://openalex.org/W2167784654","https://openalex.org/W2192128789","https://openalex.org/W2242800359","https://openalex.org/W2333603634","https://openalex.org/W2344072768","https://openalex.org/W2395955025","https://openalex.org/W2505101302","https://openalex.org/W2559885217","https://openalex.org/W2599119317","https://openalex.org/W2620704513","https://openalex.org/W2620760558","https://openalex.org/W2727347990","https://openalex.org/W2745617632","https://openalex.org/W2799610300","https://openalex.org/W2808113972","https://openalex.org/W2887004133","https://openalex.org/W2911964244","https://openalex.org/W2952482610","https://openalex.org/W2963520355","https://openalex.org/W2998216295","https://openalex.org/W3098783205","https://openalex.org/W3102627425","https://openalex.org/W3124560715","https://openalex.org/W3158650448","https://openalex.org/W4238859253","https://openalex.org/W4246954663","https://openalex.org/W4252684946","https://openalex.org/W4285719527","https://openalex.org/W6637437460","https://openalex.org/W6748471295","https://openalex.org/W6750709755","https://openalex.org/W7066667914"],"related_works":["https://openalex.org/W4367336074","https://openalex.org/W3154045278","https://openalex.org/W4379620016","https://openalex.org/W4393666307","https://openalex.org/W3210764983","https://openalex.org/W4393443811","https://openalex.org/W4367335949","https://openalex.org/W3089416646","https://openalex.org/W4396816114","https://openalex.org/W4380048833"],"abstract_inverted_index":{"The":[0],"continuous":[1],"contributions":[2],"made":[3],"by":[4,302],"long":[5],"time":[6,112,128,143,167,239,285,325],"contributors":[7,156],"(LTCs)":[8],"are":[9,151],"a":[10,30,84,102,105,108,126,161,172,264,306,309],"key":[11],"factor":[12],"enabling":[13],"open":[14],"source":[15],"software":[16],"(OSS)":[17],"projects":[18,35,64,262,344],"to":[19,49,65,313,341],"be":[20,66],"successful":[21],"and":[22,36,117,147,154,201,215,295],"survival.":[23],"We":[24,78,89,100,204,218,243,267,288],"study":[25,43],"Github":[26,80,87],"as":[27,104],"it":[28],"has":[29],"large":[31],"number":[32,271,298],"of":[33,38,44,86,107,160,166,182,230,238,272,283,299,324],"OSS":[34,63,343],"millions":[37],"contributors,":[39],"which":[40,96,186],"enables":[41],"the":[42,45,91,111,121,142,180,225,246,261,270,275,284,292,296,314],"transition":[46],"from":[47,76,82,179,256],"newcomers":[48,61,252,257],"LTCs.":[50,242,328],"In":[51,133],"this":[52],"paper,":[53],"we":[54,57,136,175,330],"investigate":[55,245],"whether":[56],"can":[58],"effectively":[59],"predict":[60],"in":[62,120,163,234,260,279,320],"LTCs":[67,159,255],"based":[68,336],"on":[69,141,184,337],"their":[70],"activity":[71],"data":[72,81],"that":[73,220,250,269,291],"is":[74,123,274],"collected":[75],"Github.":[77],"collect":[79],"GHTorrent,":[83],"mirror":[85],"data.":[88],"select":[90],"most":[92,247,276,317],"popular":[93],"917":[94],"projects,":[95],"contain":[97],"75,046":[98],"contributors.":[99],"determine":[101],"developer":[103,191,195],"LTC":[106],"project":[109,122,162,310],"if":[110],"interval":[113,240,286,326],"between":[114],"his/her":[115],"first":[116],"last":[118],"commit":[119],"larger":[124],"than":[125,232],"certain":[127],"<i":[129],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[130],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">T</i>":[131],".":[132],"our":[134,338],"experiment,":[135],"use":[137],"three":[138,164,236,281,322],"different":[139],"settings":[140,165,237,282,323],"interval:":[144],"1,":[145],"2,":[146],"3":[148],"years.":[149],"There":[150],"9,238,":[152],"3,968,":[153],"1,577":[155],"who":[157,253,258],"become":[158,254],"interval,":[168],"respectively.":[169],"To":[170],"build":[171],"prediction":[173],"model,":[174],"extract":[176],"many":[177],"features":[178,249,319],"activities":[181],"developers":[183,304],"Github,":[185],"group":[187],"into":[188],"five":[189],"dimensions:":[190],"profile,":[192,194],"repository":[193,198],"monthly":[196,199],"activity,":[197,200],"collaboration":[202],"network.":[203],"apply":[205],"several":[206,332],"classifiers":[207],"including":[208],"naive":[209],"Bayes,":[210],"SVM,":[211],"decision":[212],"tree,":[213],"kNN":[214],"random":[216,221],"forest.":[217],"find":[219,268,290],"forest":[222],"classifier":[223],"achieves":[224],"best":[226],"performance":[227],"with":[228],"AUCs":[229],"more":[231],"0.75":[233],"all":[235,280,321],"for":[241,263,327,334],"also":[244,289,311],"important":[248,277,318],"differentiate":[251],"stay":[259],"short":[265],"time.":[266],"followers":[273],"feature":[278],"studied.":[287],"programming":[293],"language":[294],"average":[297],"commits":[300],"contributed":[301],"other":[303],"when":[305],"newcomer":[307],"joins":[308],"belong":[312],"top":[315],"10":[316],"Finally,":[329],"provide":[331],"implications":[333],"action":[335],"analysis":[339],"results":[340],"help":[342],"retain":[345],"newcomers.":[346]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":11},{"year":2024,"cited_by_count":17},{"year":2023,"cited_by_count":15},{"year":2022,"cited_by_count":13},{"year":2021,"cited_by_count":11},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
