{"id":"https://openalex.org/W7083711822","doi":"https://doi.org/10.48550/arxiv.2509.21380","title":"Coreset selection based on Intra-class diversity","display_name":"Coreset selection based on Intra-class diversity","publication_year":2025,"publication_date":"2025-09-23","ids":{"openalex":"https://openalex.org/W7083711822","doi":"https://doi.org/10.48550/arxiv.2509.21380"},"language":"en","primary_location":{"id":"doi:10.48550/arxiv.2509.21380","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.21380","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2509.21380","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Ashraf, Imran","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ashraf, Imran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Ullah, Mukhtar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ullah, Mukhtar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"Nadeem, Muhammad Faisal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nadeem, Muhammad Faisal","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":null,"display_name":"Noor, Muhammad Nouman","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Noor, Muhammad Nouman","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.6532999873161316,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.6532999873161316,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11775","display_name":"COVID-19 diagnosis using AI","score":0.0763000026345253,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.05209999904036522,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/representativeness-heuristic","display_name":"Representativeness heuristic","score":0.5719000101089478},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.510200023651123},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.5020999908447266},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.49900001287460327},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4681999981403351},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.39579999446868896},{"id":"https://openalex.org/keywords/transfer-of-learning","display_name":"Transfer of learning","score":0.3862000107765198},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.3564000129699707},{"id":"https://openalex.org/keywords/model-selection","display_name":"Model selection","score":0.33489999175071716}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7222999930381775},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6915000081062317},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6445000171661377},{"id":"https://openalex.org/C37381756","wikidata":"https://www.wikidata.org/wiki/Q20203288","display_name":"Representativeness heuristic","level":2,"score":0.5719000101089478},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.510200023651123},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.5020999908447266},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.49900001287460327},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4681999981403351},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.39579999446868896},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.3862000107765198},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3736000061035156},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.3564000129699707},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.33489999175071716},{"id":"https://openalex.org/C73602740","wikidata":"https://www.wikidata.org/wiki/Q7795822","display_name":"Thompson sampling","level":3,"score":0.3179999887943268},{"id":"https://openalex.org/C2777868144","wikidata":"https://www.wikidata.org/wiki/Q7239817","display_name":"Preference elicitation","level":3,"score":0.3149999976158142},{"id":"https://openalex.org/C89611455","wikidata":"https://www.wikidata.org/wiki/Q6804646","display_name":"Mechanism (biology)","level":2,"score":0.30720001459121704},{"id":"https://openalex.org/C2778334786","wikidata":"https://www.wikidata.org/wiki/Q1586270","display_name":"Variation (astronomy)","level":2,"score":0.2976999878883362},{"id":"https://openalex.org/C126661757","wikidata":"https://www.wikidata.org/wiki/Q4925641","display_name":"Random search","level":2,"score":0.2971000075340271},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.2777999937534332},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.2718000113964081},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.2676999866962433},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.2630999982357025},{"id":"https://openalex.org/C169258074","wikidata":"https://www.wikidata.org/wiki/Q245748","display_name":"Random forest","level":2,"score":0.2547999918460846},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C77618280","wikidata":"https://www.wikidata.org/wiki/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.2535000145435333},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.2513999938964844}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2509.21380","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.21380","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2509.21380","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2509.21380","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Deep":[0],"Learning":[1],"models":[2],"have":[3,29],"transformed":[4],"various":[5],"domains,":[6],"including":[7],"the":[8,54,70,85,98,111,123,131,139,148,152,155,165,168,177,220,225,228,245,249],"healthcare":[9],"sector,":[10],"particularly":[11],"biomedical":[12,239],"image":[13],"classification":[14,234],"by":[15,195,231],"learning":[16],"intricate":[17],"features":[18],"and":[19,41,50,115],"enabling":[20],"accurate":[21],"diagnostics":[22],"pertaining":[23],"to":[24,34,53,69,92,106,121,137,209],"complex":[25],"diseases.":[26],"Recent":[27],"studies":[28],"adopted":[30],"two":[31],"different":[32],"approaches":[33,45],"train":[35],"DL":[36],"models:":[37],"training":[38,82,114],"from":[39],"scratch":[40],"transfer":[42],"learning.":[43],"Both":[44],"demand":[46],"substantial":[47],"computational":[48,63],"time":[49],"resources":[51],"due":[52,68],"involvement":[55],"of":[56,88,110,130,150,154,161,227],"massive":[57],"datasets":[58],"in":[59,171],"model":[60],"training.":[61],"These":[62],"demands":[64],"are":[65,217],"further":[66],"increased":[67],"design-space":[71],"exploration":[72],"required":[73],"for":[74,113,201,219,257],"selecting":[75,138],"optimal":[76],"hyperparameters,":[77],"which":[78],"typically":[79],"necessitates":[80],"several":[81,254],"rounds.":[83],"With":[84],"growing":[86],"sizes":[87],"datasets,":[89],"exploring":[90],"solutions":[91],"this":[93,182,193],"problem":[94],"has":[95,179],"recently":[96],"gained":[97],"research":[99],"community's":[100],"attention.":[101],"A":[102,134,158],"plausible":[103],"solution":[104],"is":[105,164],"select":[107],"a":[108,127,207,237],"subset":[109],"dataset":[112,178],"hyperparameter":[116],"search.":[117],"This":[118,190],"subset,":[119],"referred":[120],"as":[122],"corset,":[124],"must":[125],"be":[126,142],"representative":[128],"set":[129],"original":[132,156],"dataset.":[133,157,174,241],"straightforward":[135],"approach":[136,252],"coreset":[140,202],"could":[141],"employing":[143],"random":[144,162,183,250],"sampling,":[145],"albeit":[146],"at":[147],"cost":[149],"compromising":[151],"representativeness":[153],"critical":[159],"limitation":[160],"sampling":[163,184,251],"bias":[166],"towards":[167],"dominant":[169],"classes":[170],"an":[172,197],"imbalanced":[173],"Even":[175],"if":[176],"inter-class":[180],"balance,":[181],"will":[185],"not":[186],"capture":[187],"intra-class":[188,211],"diversity.":[189],"study":[191],"addresses":[192],"issue":[194],"introducing":[196],"intelligent,":[198],"lightweight":[199],"mechanism":[200],"selection.":[203],"Specifically,":[204],"it":[205],"proposes":[206],"method":[208],"extract":[210],"diversity,":[212],"forming":[213],"per-class":[214],"clusters":[215],"that":[216,244],"utilized":[218],"final":[221],"sampling.":[222],"We":[223],"demonstrate":[224,243],"efficacy":[226],"proposed":[229,246],"methodology":[230],"conducting":[232],"extensive":[233],"experiments":[235],"on":[236,253],"well-known":[238],"imaging":[240],"Results":[242],"scheme":[247],"outperforms":[248],"performance":[255],"metrics":[256],"uniform":[258],"conditions.":[259]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
