{"id":"https://openalex.org/W2617919282","doi":"https://doi.org/10.1142/s0218001418500039","title":"A Novel Clustering-Based Sampling Approach for Minimum Sample Set in Big Data Environment","display_name":"A Novel Clustering-Based Sampling Approach for Minimum Sample Set in Big Data Environment","publication_year":2017,"publication_date":"2017-05-24","ids":{"openalex":"https://openalex.org/W2617919282","doi":"https://doi.org/10.1142/s0218001418500039","mag":"2617919282"},"language":"en","primary_location":{"id":"doi:10.1142/s0218001418500039","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001418500039","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044953356","display_name":"Jia Zhao","orcid":"https://orcid.org/0000-0002-3652-1903"},"institutions":[{"id":"https://openalex.org/I99682543","display_name":"University of Minho","ror":"https://ror.org/037wpkx04","country_code":"PT","type":"education","lineage":["https://openalex.org/I99682543"]},{"id":"https://openalex.org/I4210166615","display_name":"INESC TEC","ror":"https://ror.org/05fa8ka61","country_code":"PT","type":"nonprofit","lineage":["https://openalex.org/I4210125590","https://openalex.org/I4210166615"]},{"id":"https://openalex.org/I4385474403","display_name":"Changchun University of Technology","ror":"https://ror.org/052pakb34","country_code":null,"type":"education","lineage":["https://openalex.org/I4385474403"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"Jia Zhao","raw_affiliation_strings":["High-Assurance Software Laboratory, INESC TEC &amp; University of Minho, Braga, Portugal","School of Computer Science and Engineering, Changchun University of Technology, Changchun 130012, P. R. China"],"affiliations":[{"raw_affiliation_string":"High-Assurance Software Laboratory, INESC TEC &amp; University of Minho, Braga, Portugal","institution_ids":["https://openalex.org/I99682543","https://openalex.org/I4210166615"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changchun University of Technology, Changchun 130012, P. R. China","institution_ids":["https://openalex.org/I4385474403","https://openalex.org/I4385474403"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020047350","display_name":"Jia Sun","orcid":"https://orcid.org/0000-0002-0190-7101"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jia Sun","raw_affiliation_strings":["Jilin Communications Polytechnic, No. 63 Diantai Street, Changchun 130012, P. R. China"],"affiliations":[{"raw_affiliation_string":"Jilin Communications Polytechnic, No. 63 Diantai Street, Changchun 130012, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091839687","display_name":"Yunan Zhai","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yunan Zhai","raw_affiliation_strings":["Jilin Communications Polytechnic, No. 63 Diantai Street, Changchun 130012, P. R. China"],"affiliations":[{"raw_affiliation_string":"Jilin Communications Polytechnic, No. 63 Diantai Street, Changchun 130012, P. R. China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5068477244","display_name":"Yan Ding","orcid":"https://orcid.org/0000-0002-8636-9831"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yan Ding","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, No. 2699 Qianjin Street, Changchun 130012, P. R. China","Zhuhai College of Jilin University, Zhuhai 519041, P. R. China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, No. 2699 Qianjin Street, Changchun 130012, P. R. China","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"Zhuhai College of Jilin University, Zhuhai 519041, P. R. China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103050318","display_name":"Chunyi Wu","orcid":"https://orcid.org/0000-0002-2186-3433"},"institutions":[{"id":"https://openalex.org/I194450716","display_name":"Jilin University","ror":"https://ror.org/00js3aw79","country_code":"CN","type":"education","lineage":["https://openalex.org/I194450716"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chunyi Wu","raw_affiliation_strings":["College of Computer Science and Technology, Jilin University, No. 2699 Qianjin Street, Changchun 130012, P. R. China","Zhuhai College of Jilin University, Zhuhai 519041, P. R. China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Jilin University, No. 2699 Qianjin Street, Changchun 130012, P. R. China","institution_ids":["https://openalex.org/I194450716"]},{"raw_affiliation_string":"Zhuhai College of Jilin University, Zhuhai 519041, P. R. China","institution_ids":["https://openalex.org/I194450716"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108570384","display_name":"Ming Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I4385474403","display_name":"Changchun University of Technology","ror":"https://ror.org/052pakb34","country_code":null,"type":"education","lineage":["https://openalex.org/I4385474403"]},{"id":"https://openalex.org/I4210143016","display_name":"Changchun Institute of Technology","ror":"https://ror.org/03r6wam78","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210143016"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ming Hu","raw_affiliation_strings":["Changchun Institute of Technology, No. 359 Kuanping Road, Changchun 130012, P. R. China","School of Computer Science and Engineering, Changchun University of Technology, Changchun 130012, P. R. China"],"affiliations":[{"raw_affiliation_string":"Changchun Institute of Technology, No. 359 Kuanping Road, Changchun 130012, P. R. China","institution_ids":["https://openalex.org/I4210143016"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Changchun University of Technology, Changchun 130012, P. R. China","institution_ids":["https://openalex.org/I4385474403","https://openalex.org/I4385474403"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5108570384"],"corresponding_institution_ids":["https://openalex.org/I4210143016","https://openalex.org/I4385474403"],"apc_list":null,"apc_paid":null,"fwci":2.064,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.89568905,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"32","issue":"02","first_page":"1850003","last_page":"1850003"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.996399998664856,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.994700014591217,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7642452716827393},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.7062582969665527},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6874386072158813},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.612079381942749},{"id":"https://openalex.org/keywords/sample","display_name":"Sample (material)","score":0.6096760034561157},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5538882613182068},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5359632968902588},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.5230332612991333},{"id":"https://openalex.org/keywords/heuristic","display_name":"Heuristic","score":0.4418726861476898},{"id":"https://openalex.org/keywords/sample-space","display_name":"Sample space","score":0.4178478419780731},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.2917941212654114}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7642452716827393},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.7062582969665527},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6874386072158813},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.612079381942749},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.6096760034561157},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5538882613182068},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5359632968902588},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.5230332612991333},{"id":"https://openalex.org/C173801870","wikidata":"https://www.wikidata.org/wiki/Q201413","display_name":"Heuristic","level":2,"score":0.4418726861476898},{"id":"https://openalex.org/C100279318","wikidata":"https://www.wikidata.org/wiki/Q467440","display_name":"Sample space","level":2,"score":0.4178478419780731},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2917941212654114},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.0},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1142/s0218001418500039","is_oa":false,"landing_page_url":"https://doi.org/10.1142/s0218001418500039","pdf_url":null,"source":{"id":"https://openalex.org/S41486457","display_name":"International Journal of Pattern Recognition and Artificial Intelligence","issn_l":"0218-0014","issn":["0218-0014","1793-6381"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319815","host_organization_name":"World Scientific","host_organization_lineage":["https://openalex.org/P4310319815"],"host_organization_lineage_names":["World Scientific"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Pattern Recognition and Artificial Intelligence","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1597561788","https://openalex.org/W1966879885","https://openalex.org/W1975930001","https://openalex.org/W1978954664","https://openalex.org/W1979462644","https://openalex.org/W2013373704","https://openalex.org/W2038885675","https://openalex.org/W2040263621","https://openalex.org/W2053744138","https://openalex.org/W2061451531","https://openalex.org/W2061620108","https://openalex.org/W2062277303","https://openalex.org/W2082986577","https://openalex.org/W2087395828","https://openalex.org/W2124701120","https://openalex.org/W2127164716","https://openalex.org/W2129501055","https://openalex.org/W2132271709","https://openalex.org/W2136824894","https://openalex.org/W2149731317","https://openalex.org/W2165093166","https://openalex.org/W2212819718","https://openalex.org/W4241941664","https://openalex.org/W4300601563"],"related_works":["https://openalex.org/W4390608645","https://openalex.org/W4247566972","https://openalex.org/W2960264696","https://openalex.org/W3090563135","https://openalex.org/W2497432351","https://openalex.org/W4206777497","https://openalex.org/W2910064364","https://openalex.org/W4255224757","https://openalex.org/W2499527417","https://openalex.org/W2218513093"],"abstract_inverted_index":{"The":[0,221,249,278],"data":[1,22,28,45,73,104,113,135,179,214,270,276],"are":[2],"rapidly":[3],"expanding":[4],"nowadays,":[5],"which":[6,94,159],"makes":[7,173],"it":[8],"very":[9],"difficult":[10],"to":[11,65,87,151,163,238],"analyze":[12],"valuable":[13],"information":[14],"from":[15,70,246],"big":[16,27,44,103,112,157],"data.":[17],"Most":[18],"of":[19,43,58,79,82,100,111,132,149,156,176,185,191,208,267,275],"the":[20,40,59,71,83,89,101,107,119,124,133,147,164,169,177,183,192,201,206,209,213,218,226,233,240,257,265,268,273,289],"existing":[21,258],"mining":[23],"algorithms":[24],"deal":[25],"with":[26,205,256],"problems":[29],"at":[30,123,198],"large":[31],"time":[32,274],"and":[33,46,74,130,137,181,216,243,271],"space":[34],"costs.":[35],"This":[36],"paper":[37],"focuses":[38],"on":[39,142],"sampling":[41,121,166],"problem":[42],"puts":[47],"forward":[48],"an":[49],"efficient":[50],"heuristic":[51],"Cluster":[52],"Sampling":[53],"Arithmetic,":[54],"called":[55],"CSA.":[56],"Many":[57],"former":[60],"researchers":[61],"adopted":[62],"random":[63,120,165],"method":[64,167],"extract":[66],"early":[67],"sample":[68,84,92,154,194,202,281],"set":[69,155,180,282],"original":[72,102,178,210,269],"then":[75],"made":[76],"a":[77,98],"variety":[78],"different":[80],"processing":[81,109,139,219],"in":[85,127,161,168,236,288],"order":[86,237],"obtain":[88,152],"corresponding":[90],"minimum":[91,153,193,280],"set,":[93],"is":[95,160],"regarded":[96],"as":[97,188],"representation":[99],"set.":[105,195],"However,":[106],"final":[108,134,250],"results":[110,136,252],"will":[114],"be":[115],"severely":[116],"affected":[117],"by":[118],"process":[122,235],"beginning,":[125],"resulting":[126],"lower":[128],"comprehensiveness":[129],"quality":[131],"longer":[138],"time.":[140,220],"Based":[141],"this":[143],"view,":[144],"CSA":[145,172,260],"introduces":[146],"idea":[148],"clustering":[150,234,241],"data,":[158,211],"contrast":[162],"current":[170],"literature.":[171],"cluster":[174],"analysis":[175],"selects":[182],"center":[184,242],"each":[186],"class":[187],"centralized":[189],"members":[190],"It":[196],"aims":[197],"ensuring":[199],"that":[200,225],"distribution":[203],"accords":[204],"characteristics":[207,266],"guarantees":[212],"integrity":[215],"reduces":[217],"max\u2013min":[222],"distance":[223],"means":[224],"pattern":[227],"recognition":[228],"has":[229,283],"been":[230],"integrated":[231],"into":[232],"get":[239],"prevent":[244],"algorithm":[245,261],"local":[247],"optimum.":[248],"experimental":[251],"show":[253],"that,":[254],"compared":[255],"work,":[259],"can":[262],"efficiently":[263],"reflect":[264],"reduce":[272],"processing.":[277],"obtained":[279],"also":[284],"achieved":[285],"good":[286],"effects":[287],"classification":[290],"algorithm.":[291]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":5},{"year":2018,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
