{"id":"https://openalex.org/W4415036308","doi":"https://doi.org/10.48550/arxiv.2505.20807","title":"Simple yet Effective Graph Distillation via Clustering","display_name":"Simple yet Effective Graph Distillation via Clustering","publication_year":2025,"publication_date":"2025-05-27","ids":{"openalex":"https://openalex.org/W4415036308","doi":"https://doi.org/10.48550/arxiv.2505.20807"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2505.20807","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.20807","pdf_url":"https://arxiv.org/pdf/2505.20807","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2505.20807","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091763061","display_name":"Yurui Lai","orcid":"https://orcid.org/0009-0000-4402-3798"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lai, Yurui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114054182","display_name":"Taiyan Zhang","orcid":"https://orcid.org/0009-0004-6757-9237"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Taiyan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5040420455","display_name":"Renchi Yang","orcid":"https://orcid.org/0000-0002-7284-3096"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Renchi","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5091763061"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.31619998812675476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.31619998812675476,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10320","display_name":"Neural Networks and Applications","score":0.2897999882698059,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.28220000863075256,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5932000279426575},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.4814999997615814},{"id":"https://openalex.org/keywords/heuristics","display_name":"Heuristics","score":0.451200008392334},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.3334999978542328},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.33070001006126404},{"id":"https://openalex.org/keywords/overhead","display_name":"Overhead (engineering)","score":0.329800009727478},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.319599986076355}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.667900025844574},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5932000279426575},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.4884999990463257},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.4814999997615814},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46309998631477356},{"id":"https://openalex.org/C127705205","wikidata":"https://www.wikidata.org/wiki/Q5748245","display_name":"Heuristics","level":2,"score":0.451200008392334},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.42660000920295715},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40880000591278076},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.3334999978542328},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.33070001006126404},{"id":"https://openalex.org/C2779960059","wikidata":"https://www.wikidata.org/wiki/Q7113681","display_name":"Overhead (engineering)","level":2,"score":0.329800009727478},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.319599986076355},{"id":"https://openalex.org/C22047676","wikidata":"https://www.wikidata.org/wiki/Q898680","display_name":"Clustering coefficient","level":3,"score":0.3167000114917755},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.31619998812675476},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.3046000003814697},{"id":"https://openalex.org/C62611344","wikidata":"https://www.wikidata.org/wiki/Q1062658","display_name":"Node (physics)","level":2,"score":0.3000999987125397},{"id":"https://openalex.org/C105611402","wikidata":"https://www.wikidata.org/wiki/Q2976589","display_name":"Spectral clustering","level":3,"score":0.29010000824928284},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.25519999861717224},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.2542000114917755},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.2526000142097473}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2505.20807","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.20807","pdf_url":"https://arxiv.org/pdf/2505.20807","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550/arxiv.2505.20807","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2505.20807","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.20807","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2505.20807","pdf_url":"https://arxiv.org/pdf/2505.20807","source":{"id":"https://openalex.org/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3317480652","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5994120800","display_name":null,"funder_award_id":"Natural","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8618575630","display_name":null,"funder_award_id":"62302414","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4415036308.pdf","grobid_xml":"https://content.openalex.org/works/W4415036308.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Despite":[0],"plentiful":[1],"successes":[2],"achieved":[3],"by":[4,95,145,178,214],"graph":[5,14,35,116,190,198],"representation":[6,74],"learning":[7],"in":[8,32,226],"various":[9],"domains,":[10],"the":[11,24,108,114,127,134,137,152,174,179,184,188],"training":[12,87],"of":[13,130,187,228,238],"neural":[15],"networks":[16],"(GNNs)":[17],"still":[18],"remains":[19],"tenaciously":[20],"challenging":[21],"due":[22],"to":[23,41,56,82,112,172,222],"tremendous":[25],"computational":[26],"overhead":[27],"needed":[28],"for":[29,88,168],"sizable":[30],"graphs":[31,44,212],"practice.":[33],"Recently,":[34],"data":[36],"distillation":[37],"(GDD),":[38],"which":[39],"seeks":[40],"distill":[42],"large":[43,90],"into":[45],"compact":[46],"and":[47,78,102,117,122,132,148,156,200],"informative":[48],"ones,":[49],"has":[50],"emerged":[51],"as":[52],"a":[53,164,192],"promising":[54],"technique":[55],"enable":[57],"efficient":[58,101],"GNN":[59],"training.":[60],"However,":[61],"most":[62],"existing":[63],"GDD":[64,104,224],"works":[65],"rely":[66],"on":[67,76,136,231],"heuristics":[68],"that":[69,125,207],"align":[70],"model":[71],"gradients":[72],"or":[73,92,219],"distributions":[75],"condensed":[77,115,189,211],"original":[79,138],"graphs,":[80,91],"leading":[81],"compromised":[83],"result":[84],"quality,":[85],"expensive":[86],"distilling":[89],"both.":[93],"Motivated":[94],"this,":[96],"this":[97],"paper":[98],"presents":[99],"an":[100],"effective":[103],"approach,":[105],"ClustGDD.":[106],"Under":[107],"hood,":[109],"ClustGDD":[110,182,215],"resorts":[111],"synthesizing":[113],"node":[118,229],"attributes":[119,186],"through":[120],"fast":[121],"theoretically-grounded":[123],"clustering":[124,155],"minimizes":[126],"within-cluster":[128],"sum":[129],"squares":[131],"maximizes":[133],"homophily":[135],"graph.":[139],"The":[140],"fundamental":[141],"idea":[142],"is":[143],"inspired":[144],"our":[146],"empirical":[147,157],"theoretical":[149],"findings":[150],"unveiling":[151],"connection":[153],"between":[154],"condensation":[158],"quality":[159,166],"using":[160],"Fr\u00e9chet":[161],"Inception":[162],"Distance,":[163],"well-known":[165],"metric":[167],"synthetic":[169],"images.":[170],"Furthermore,":[171],"mitigate":[173],"adverse":[175],"effects":[176],"caused":[177],"homophily-based":[180],"clustering,":[181],"refines":[183],"nodal":[185],"with":[191],"small":[193],"augmentation":[194],"learned":[195],"via":[196],"class-aware":[197],"sampling":[199],"consistency":[201],"loss.":[202],"Our":[203],"extensive":[204],"experiments":[205],"exhibit":[206],"GNNs":[208],"trained":[209],"over":[210],"output":[213],"consistently":[216],"achieve":[217],"superior":[218],"comparable":[220],"performance":[221],"state-of-the-art":[223],"methods":[225],"terms":[227],"classification":[230],"five":[232],"benchmark":[233],"datasets,":[234],"while":[235],"being":[236],"orders":[237],"magnitude":[239],"faster.":[240]},"counts_by_year":[],"updated_date":"2026-03-15T09:29:46.208133","created_date":"2025-10-10T00:00:00"}
