{"id":"https://openalex.org/W4210314036","doi":"https://doi.org/10.3390/info13020073","title":"Automatic Identification of Similar Pull-Requests in GitHub\u2019s Repositories Using Machine Learning","display_name":"Automatic Identification of Similar Pull-Requests in GitHub\u2019s Repositories Using Machine Learning","publication_year":2022,"publication_date":"2022-02-03","ids":{"openalex":"https://openalex.org/W4210314036","doi":"https://doi.org/10.3390/info13020073"},"language":"en","primary_location":{"id":"doi:10.3390/info13020073","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info13020073","pdf_url":"https://www.mdpi.com/2078-2489/13/2/73/pdf?version=1643900470","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2078-2489/13/2/73/pdf?version=1643900470","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000812814","display_name":"Hamzeh Eyal Salman","orcid":"https://orcid.org/0000-0003-3258-7304"},"institutions":[{"id":"https://openalex.org/I21173400","display_name":"Mutah University","ror":"https://ror.org/008g9ns82","country_code":"JO","type":"education","lineage":["https://openalex.org/I21173400"]}],"countries":["JO"],"is_corresponding":true,"raw_author_name":"Hamzeh Eyal Salman","raw_affiliation_strings":["Software Engineering Department, IT Faculty, Mutah University, Al-Karak 61710, Jordan"],"raw_orcid":"https://orcid.org/0000-0003-3258-7304","affiliations":[{"raw_affiliation_string":"Software Engineering Department, IT Faculty, Mutah University, Al-Karak 61710, Jordan","institution_ids":["https://openalex.org/I21173400"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074245718","display_name":"Zakarea Alshara","orcid":"https://orcid.org/0000-0002-2727-6985"},"institutions":[{"id":"https://openalex.org/I156983542","display_name":"Jordan University of Science and Technology","ror":"https://ror.org/03y8mtb59","country_code":"JO","type":"education","lineage":["https://openalex.org/I156983542"]}],"countries":["JO"],"is_corresponding":false,"raw_author_name":"Zakarea Alshara","raw_affiliation_strings":["Software Engineering Department, IT Faculty, Jordan University of Science and Technology, Irbid 22110, Jordan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Software Engineering Department, IT Faculty, Jordan University of Science and Technology, Irbid 22110, Jordan","institution_ids":["https://openalex.org/I156983542"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066555930","display_name":"Abdelhak-Djamel Seriai","orcid":"https://orcid.org/0000-0003-1961-1410"},"institutions":[{"id":"https://openalex.org/I19894307","display_name":"Universit\u00e9 de Montpellier","ror":"https://ror.org/051escj72","country_code":"FR","type":"education","lineage":["https://openalex.org/I19894307"]},{"id":"https://openalex.org/I4210101743","display_name":"Laboratoire d'Informatique, de Robotique et de Micro\u00e9lectronique de Montpellier","ror":"https://ror.org/013yean28","country_code":"FR","type":"facility","lineage":["https://openalex.org/I1294671590","https://openalex.org/I1294671590","https://openalex.org/I1326498283","https://openalex.org/I151295451","https://openalex.org/I19894307","https://openalex.org/I4210101743","https://openalex.org/I4210159245","https://openalex.org/I4405261681"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Abdelhak-Djamel Seriai","raw_affiliation_strings":["LIRMM Lab, University of Montpellier, 34000 Montpellier, France"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"LIRMM Lab, University of Montpellier, 34000 Montpellier, France","institution_ids":["https://openalex.org/I4210101743","https://openalex.org/I19894307"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5000812814"],"corresponding_institution_ids":["https://openalex.org/I21173400"],"apc_list":{"value":1400,"currency":"CHF","value_usd":1515},"apc_paid":{"value":1262,"currency":"EUR","value_usd":1361},"fwci":2.5519,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.9097157,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"13","issue":"2","first_page":"73","last_page":"73"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11241","display_name":"Advanced Malware Detection Techniques","score":0.982699990272522,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9750999808311462,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8318743705749512},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6649882793426514},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.5403234958648682},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5319764018058777},{"id":"https://openalex.org/keywords/coding","display_name":"Coding (social sciences)","score":0.5094335675239563},{"id":"https://openalex.org/keywords/source-code","display_name":"Source code","score":0.45767897367477417},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4309538006782532},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4304954707622528},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4281395375728607},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.407521516084671},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.35065001249313354},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33334529399871826},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.33117109537124634}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8318743705749512},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6649882793426514},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.5403234958648682},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5319764018058777},{"id":"https://openalex.org/C179518139","wikidata":"https://www.wikidata.org/wiki/Q5140297","display_name":"Coding (social sciences)","level":2,"score":0.5094335675239563},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.45767897367477417},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4309538006782532},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4304954707622528},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4281395375728607},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.407521516084671},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.35065001249313354},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33334529399871826},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.33117109537124634},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/info13020073","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info13020073","pdf_url":"https://www.mdpi.com/2078-2489/13/2/73/pdf?version=1643900470","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},{"id":"pmh:oai:HAL:lirmm-03586823v1","is_oa":true,"landing_page_url":"https://hal-lirmm.ccsd.cnrs.fr/lirmm-03586823","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, 2022, 13 (2), pp.73-97. &#x27E8;10.3390/info13020073&#x27E9;","raw_type":"Journal articles"},{"id":"pmh:oai:doaj.org/article:57a2022e5f254102b28ca158694ad660","is_oa":true,"landing_page_url":"https://doaj.org/article/57a2022e5f254102b28ca158694ad660","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information, Vol 13, Iss 2, p 73 (2022)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2078-2489/13/2/73/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/info13020073","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Information; Volume 13; Issue 2; Pages: 73","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/info13020073","is_oa":true,"landing_page_url":"https://doi.org/10.3390/info13020073","pdf_url":"https://www.mdpi.com/2078-2489/13/2/73/pdf?version=1643900470","source":{"id":"https://openalex.org/S4210219776","display_name":"Information","issn_l":"2078-2489","issn":["2078-2489"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Information","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/17","score":0.4099999964237213,"display_name":"Partnerships for the goals"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4210314036.pdf","grobid_xml":"https://content.openalex.org/works/W4210314036.grobid-xml"},"referenced_works_count":51,"referenced_works":["https://openalex.org/W1260576311","https://openalex.org/W1856460888","https://openalex.org/W1978394996","https://openalex.org/W1988790447","https://openalex.org/W1994598608","https://openalex.org/W1998900885","https://openalex.org/W2023746780","https://openalex.org/W2034251629","https://openalex.org/W2046216252","https://openalex.org/W2075367182","https://openalex.org/W2101503248","https://openalex.org/W2124100711","https://openalex.org/W2156833313","https://openalex.org/W2165022036","https://openalex.org/W2167630669","https://openalex.org/W2168612058","https://openalex.org/W2229220273","https://openalex.org/W2288177242","https://openalex.org/W2344367508","https://openalex.org/W2395791174","https://openalex.org/W2468008428","https://openalex.org/W2510265546","https://openalex.org/W2567878593","https://openalex.org/W2614718149","https://openalex.org/W2758152813","https://openalex.org/W2767894015","https://openalex.org/W2801096891","https://openalex.org/W2803419445","https://openalex.org/W2809618723","https://openalex.org/W2884292363","https://openalex.org/W2887940766","https://openalex.org/W2889313257","https://openalex.org/W2903449363","https://openalex.org/W2920924032","https://openalex.org/W2946108305","https://openalex.org/W2956851957","https://openalex.org/W2981381779","https://openalex.org/W3013276295","https://openalex.org/W3080820911","https://openalex.org/W3082955277","https://openalex.org/W3085062472","https://openalex.org/W3091115553","https://openalex.org/W3098996042","https://openalex.org/W3127698408","https://openalex.org/W4213009331","https://openalex.org/W4242648443","https://openalex.org/W4246032972","https://openalex.org/W4251460506","https://openalex.org/W6674809819","https://openalex.org/W6719788525","https://openalex.org/W6824220577"],"related_works":["https://openalex.org/W4298130764","https://openalex.org/W2804364458","https://openalex.org/W2132641928","https://openalex.org/W4310225030","https://openalex.org/W2090259340","https://openalex.org/W1926736923","https://openalex.org/W2158836806","https://openalex.org/W2393816671","https://openalex.org/W2083665254","https://openalex.org/W3081644756"],"abstract_inverted_index":{"Context:":[0],"In":[1,28,120],"a":[2,9,25,37],"social":[3],"coding":[4],"platform":[5],"such":[6],"as":[7],"GitHub,":[8],"pull-request":[10,112],"mechanism":[11,48],"is":[12,49,96,136],"frequently":[13],"used":[14,196],"by":[15,75,306,314],"contributors":[16,54],"to":[17,22,35,41,55,70,98,102,106,109,113,125,138,168,179,197,211,230,243,252],"submit":[18,56],"their":[19],"code":[20,31],"changes":[21,32],"reviewers":[23],"of":[24],"given":[26],"repository.":[27],"general,":[29],"these":[30],"are":[33,195],"either":[34],"add":[36],"new":[38],"feature":[39],"or":[40,142],"fix":[42],"an":[43],"existing":[44],"bug.":[45],"However,":[46],"this":[47,121,235],"distributed":[50],"and":[51,84,118,153,190,238,248,261,265,279,283,298,304,309,312],"allows":[52,149],"different":[53,76],"unintentionally":[57],"similar":[58,62,100,127,170,199],"pull-requests":[59,66,101,128,166,171,200],"that":[60,133,222],"perform":[61],"development":[63],"activities.":[64],"Similar":[65],"may":[67],"be":[68,107],"submitted":[69],"review":[71],"in":[72,115,234],"parallel":[73],"time":[74,83,117,247,297],"reviewers.":[77],"This":[78,147],"will":[79,88],"cause":[80],"redundant":[81],"reviewing":[82,145,151,296],"efforts.":[85],"Moreover,":[86,291],"it":[87,95,210,241],"complicate":[89],"the":[90,103,139,143,176,223,231,245,253,256,292],"collaboration":[91],"process.":[92],"Objective:":[93],"Therefore,":[94],"useful":[97],"assign":[99],"same":[104,140,144],"reviewer":[105,141,246],"able":[108],"decide":[110],"which":[111],"choose":[114],"effective":[116],"effort.":[119,249],"article,":[122],"we":[123,159,174,207],"propose":[124],"group":[126,198],"together":[129],"into":[130],"clusters":[131],"so":[132],"each":[134],"cluster":[135],"assigned":[137],"team.":[146],"proposal":[148],"saving":[150],"efforts":[152],"time.":[154],"Method:":[155],"To":[156,203],"do":[157],"so,":[158],"first":[160],"extract":[161],"descriptive":[162],"textual":[163],"information":[164,178],"from":[165,215],"content":[167],"link":[169],"together.":[172,201],"Then,":[173],"employ":[175],"extracted":[177],"find":[180],"similarities":[181],"among":[182],"pull-requests.":[183],"Finally,":[184],"machine":[185],"learning":[186],"algorithms":[187],"(K-Means":[188],"clustering":[189,193,276,317],"agglomeration":[191,274,315],"hierarchical":[192,275,316],"algorithms)":[194],"Results:":[202],"validate":[204],"our":[205],"proposal,":[206],"have":[208],"applied":[209],"twenty":[212],"popular":[213],"repositories":[214],"public":[216],"dataset.":[217],"The":[218],"experimental":[219],"results":[220,228],"show":[221],"proposed":[224,293],"approach":[225,294],"achieved":[226],"promising":[227],"according":[229],"well-known":[232],"metrics":[233],"subject:":[236],"precision":[237,264,282],"recall.":[239],"Furthermore,":[240],"helps":[242],"save":[244],"Conclusion:":[250],"According":[251],"obtained":[254],"results,":[255],"K-Means":[257,307],"algorithm":[258,308],"achieves":[259],"94%":[260],"91%":[262],"average":[263,281,301],"recall":[266,284],"values":[267,285],"over":[268,286],"all":[269,287],"considered":[270,288],"repositories,":[271,289],"respectively,":[272],"while":[273],"performs":[277],"93%":[278],"98%":[280],"respectively.":[290],"saves":[295],"effort":[299],"on":[300],"between":[302,310],"(67%":[303,311],"91%)":[305],"83%)":[313],"algorithm.":[318]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1}],"updated_date":"2026-05-07T13:39:58.223016","created_date":"2022-02-08T00:00:00"}
