{"id":"https://openalex.org/W4409049457","doi":"https://doi.org/10.1007/s00357-025-09501-w","title":"Natural Language-Based Synthetic Data Generation for Cluster Analysis","display_name":"Natural Language-Based Synthetic Data Generation for Cluster Analysis","publication_year":2025,"publication_date":"2025-03-31","ids":{"openalex":"https://openalex.org/W4409049457","doi":"https://doi.org/10.1007/s00357-025-09501-w"},"language":"en","primary_location":{"id":"doi:10.1007/s00357-025-09501-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00357-025-09501-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00357-025-09501-w.pdf","source":{"id":"https://openalex.org/S73028643","display_name":"Journal of Classification","issn_l":"0176-4268","issn":["0176-4268","1432-1343"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Classification","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s00357-025-09501-w.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5064754104","display_name":"Michael J. Zellinger","orcid":"https://orcid.org/0009-0001-7499-148X"},"institutions":[{"id":"https://openalex.org/I122411786","display_name":"California Institute of Technology","ror":"https://ror.org/05dxps055","country_code":"US","type":"education","lineage":["https://openalex.org/I122411786"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Michael J. Zellinger","raw_affiliation_strings":["Department of Computing & Mathematical Sciences, California Institute of Technology, 91125, Pasadena, CA, USA"],"raw_orcid":"https://orcid.org/0009-0001-7499-148X","affiliations":[{"raw_affiliation_string":"Department of Computing & Mathematical Sciences, California Institute of Technology, 91125, Pasadena, CA, USA","institution_ids":["https://openalex.org/I122411786"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033240072","display_name":"Peter B\u00fchlmann","orcid":"https://orcid.org/0000-0002-1782-6015"},"institutions":[{"id":"https://openalex.org/I35440088","display_name":"ETH Zurich","ror":"https://ror.org/05a28rw58","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I35440088"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Peter B\u00fchlmann","raw_affiliation_strings":["Seminar for Statistics, ETH Z\u00fcrich, Z\u00fcrich, Switzerland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seminar for Statistics, ETH Z\u00fcrich, Z\u00fcrich, Switzerland","institution_ids":["https://openalex.org/I35440088"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5033240072"],"corresponding_institution_ids":["https://openalex.org/I35440088"],"apc_list":{"value":2390,"currency":"EUR","value_usd":2990},"apc_paid":{"value":2390,"currency":"EUR","value_usd":2990},"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02646261,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"42","issue":"3","first_page":"517","last_page":"543"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10637","display_name":"Advanced Clustering Algorithms Research","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11106","display_name":"Data Management and Algorithms","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10064","display_name":"Complex Network Analysis Techniques","score":0.9943000078201294,"subfield":{"id":"https://openalex.org/subfields/3109","display_name":"Statistical and Nonlinear Physics"},"field":{"id":"https://openalex.org/fields/31","display_name":"Physics and Astronomy"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5812503695487976},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5615719556808472},{"id":"https://openalex.org/keywords/cluster","display_name":"Cluster (spacecraft)","score":0.5519018173217773},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5428575873374939},{"id":"https://openalex.org/keywords/natural-language-generation","display_name":"Natural language generation","score":0.41825711727142334},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.3878379464149475},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.3589787185192108},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.350596159696579},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.12313458323478699}],"concepts":[{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5812503695487976},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5615719556808472},{"id":"https://openalex.org/C164866538","wikidata":"https://www.wikidata.org/wiki/Q367351","display_name":"Cluster (spacecraft)","level":2,"score":0.5519018173217773},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5428575873374939},{"id":"https://openalex.org/C2776187449","wikidata":"https://www.wikidata.org/wiki/Q1513879","display_name":"Natural language generation","level":3,"score":0.41825711727142334},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3878379464149475},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.3589787185192108},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.350596159696579},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.12313458323478699},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s00357-025-09501-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00357-025-09501-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00357-025-09501-w.pdf","source":{"id":"https://openalex.org/S73028643","display_name":"Journal of Classification","issn_l":"0176-4268","issn":["0176-4268","1432-1343"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Classification","raw_type":"journal-article"},{"id":"pmh:doi:10.3929/ethz-b-000730274","is_oa":true,"landing_page_url":"http://hdl.handle.net/20.500.11850/730274","pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1007/s00357-025-09501-w","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s00357-025-09501-w","pdf_url":"https://link.springer.com/content/pdf/10.1007/s00357-025-09501-w.pdf","source":{"id":"https://openalex.org/S73028643","display_name":"Journal of Classification","issn_l":"0176-4268","issn":["0176-4268","1432-1343"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Classification","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4409049457.pdf"},"referenced_works_count":48,"referenced_works":["https://openalex.org/W143994917","https://openalex.org/W156509570","https://openalex.org/W1536492814","https://openalex.org/W1549152550","https://openalex.org/W1673310716","https://openalex.org/W1965751196","https://openalex.org/W1967597907","https://openalex.org/W1975152892","https://openalex.org/W1983398668","https://openalex.org/W1987971958","https://openalex.org/W2011301426","https://openalex.org/W2049633694","https://openalex.org/W2071949631","https://openalex.org/W2076089275","https://openalex.org/W2077702199","https://openalex.org/W2088879945","https://openalex.org/W2092799168","https://openalex.org/W2126751256","https://openalex.org/W2127218421","https://openalex.org/W2145001205","https://openalex.org/W2154532634","https://openalex.org/W2162833336","https://openalex.org/W2165533158","https://openalex.org/W2278925444","https://openalex.org/W2304387544","https://openalex.org/W2549416390","https://openalex.org/W2601243251","https://openalex.org/W2740924709","https://openalex.org/W2787894218","https://openalex.org/W2800909518","https://openalex.org/W2941167638","https://openalex.org/W2954848721","https://openalex.org/W2963026768","https://openalex.org/W2999905431","https://openalex.org/W3008003211","https://openalex.org/W3035965352","https://openalex.org/W3096333621","https://openalex.org/W3099878876","https://openalex.org/W4235169531","https://openalex.org/W4255949318","https://openalex.org/W4300980582","https://openalex.org/W4385265386","https://openalex.org/W4389519449","https://openalex.org/W6675354045","https://openalex.org/W6677945368","https://openalex.org/W6684578312","https://openalex.org/W6768496051","https://openalex.org/W6778883912"],"related_works":["https://openalex.org/W2955859849","https://openalex.org/W2122804826","https://openalex.org/W2152921782","https://openalex.org/W382594479","https://openalex.org/W2470045054","https://openalex.org/W2575772232","https://openalex.org/W2151245229","https://openalex.org/W2140902089","https://openalex.org/W2988746243","https://openalex.org/W1510553545"],"abstract_inverted_index":{"Abstract":[0],"Cluster":[1],"analysis":[2],"relies":[3],"on":[4,15,87],"effective":[5],"benchmarks":[6,76,118],"for":[7,119],"evaluating":[8],"and":[9,79,116],"comparing":[10],"different":[11,61],"algorithms.":[12],"Simulation":[13],"studies":[14],"synthetic":[16,83],"data":[17,25,84,125],"are":[18],"popular":[19],"because":[20],"important":[21],"features":[22],"of":[23,90,124],"the":[24,29,34],"sets,":[26],"such":[27,67],"as":[28,50,68],"overlap":[30],"between":[31],"clusters,":[32],"or":[33,97],"variation":[35],"in":[36],"cluster":[37,69,120],"shapes,":[38],"can":[39],"be":[40],"effectively":[41],"varied.":[42],"Unfortunately,":[43],"creating":[44],"evaluation":[45],"scenarios":[46],"is":[47,130],"often":[48],"laborious,":[49],"practitioners":[51],"must":[52],"translate":[53],"higher-level":[54],"scenario":[55],"descriptions":[56,96],"like":[57],"\u201cclusters":[58],"with":[59],"very":[60],"shapes\u201d":[62],"into":[63],"lower-level":[64],"geometric":[65,99],"parameters":[66],"centers,":[70],"covariance":[71],"matrices,":[72],"etc.":[73],"To":[74],"make":[75],"more":[77],"convenient":[78],"informative,":[80],"we":[81],"propose":[82],"generation":[85,126],"based":[86],"direct":[88],"specification":[89],"high-level":[91,98],"scenarios,":[92],"either":[93],"through":[94],"verbal":[95,128],"parameters.":[100],"Our":[101],"open-source":[102],"Python":[103],"package":[104],"https://repliclust.org":[105],"implements":[106],"this":[107],"workflow,":[108],"making":[109],"it":[110],"easy":[111],"to":[112],"set":[113],"up":[114],"interpretable":[115],"reproducible":[117],"analysis.":[121],"A":[122],"demo":[123],"from":[127],"inputs":[129],"available":[131],"at":[132],"https://demo.repliclust.org":[133],".":[134]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
