{"id":"https://openalex.org/W2583170832","doi":"https://doi.org/10.2352/issn.2470-1173.2017.1.vda-388","title":"Visual Interactive Creation and Validation of Text Clustering Workflows to Explore Document Collections","display_name":"Visual Interactive Creation and Validation of Text Clustering Workflows to Explore Document Collections","publication_year":2017,"publication_date":"2017-01-29","ids":{"openalex":"https://openalex.org/W2583170832","doi":"https://doi.org/10.2352/issn.2470-1173.2017.1.vda-388","mag":"2583170832"},"language":"en","primary_location":{"id":"doi:10.2352/issn.2470-1173.2017.1.vda-388","is_oa":false,"landing_page_url":"https://doi.org/10.2352/issn.2470-1173.2017.1.vda-388","pdf_url":null,"source":{"id":"https://openalex.org/S4210227276","display_name":"Electronic Imaging","issn_l":"2470-1173","issn":["2470-1173"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Electronic Imaging","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063602485","display_name":"Tobias Ruppert","orcid":"https://orcid.org/0000-0002-5658-9112"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Tobias Ruppert","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018652518","display_name":"Michael Staab","orcid":"https://orcid.org/0000-0003-0894-7576"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Michael Staab","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090713275","display_name":"Andreas Bannach","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andreas Bannach","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018698322","display_name":"Hendrik L\u00fccke\u2010Tieke","orcid":"https://orcid.org/0000-0002-0934-6820"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hendrik L\u00fccke-Tieke","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078086722","display_name":"J\u00fcrgen Bernard","orcid":"https://orcid.org/0000-0001-8741-9709"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"J\u00fcrgen Bernard","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046597105","display_name":"Arjan Kuijper","orcid":"https://orcid.org/0000-0002-6413-0061"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Arjan Kuijper","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5032433963","display_name":"J\u00f6rn Kohlhammer","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"J\u00f6rn Kohlhammer","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.3695,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.67577057,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":"29","issue":"1","first_page":"46","last_page":"57"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9793000221252441,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9781000018119812,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8670629262924194},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.82538902759552},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.6829672455787659},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.6265714764595032},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5367442965507507},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.5237224698066711},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.518015444278717},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5119353532791138},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.4733541011810303},{"id":"https://openalex.org/keywords/fuzzy-clustering","display_name":"Fuzzy clustering","score":0.47130975127220154},{"id":"https://openalex.org/keywords/brown-clustering","display_name":"Brown clustering","score":0.46832969784736633},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.467998206615448},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4548807740211487},{"id":"https://openalex.org/keywords/clustering-high-dimensional-data","display_name":"Clustering high-dimensional data","score":0.41033995151519775},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3448307514190674},{"id":"https://openalex.org/keywords/cure-data-clustering-algorithm","display_name":"CURE data clustering algorithm","score":0.2622053921222687},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.18546780943870544}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8670629262924194},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.82538902759552},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.6829672455787659},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.6265714764595032},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5367442965507507},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.5237224698066711},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.518015444278717},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5119353532791138},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.4733541011810303},{"id":"https://openalex.org/C17212007","wikidata":"https://www.wikidata.org/wiki/Q5511111","display_name":"Fuzzy clustering","level":3,"score":0.47130975127220154},{"id":"https://openalex.org/C167984511","wikidata":"https://www.wikidata.org/wiki/Q17003931","display_name":"Brown clustering","level":5,"score":0.46832969784736633},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.467998206615448},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4548807740211487},{"id":"https://openalex.org/C184509293","wikidata":"https://www.wikidata.org/wiki/Q5136711","display_name":"Clustering high-dimensional data","level":3,"score":0.41033995151519775},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3448307514190674},{"id":"https://openalex.org/C33704608","wikidata":"https://www.wikidata.org/wiki/Q5014717","display_name":"CURE data clustering algorithm","level":4,"score":0.2622053921222687},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.18546780943870544},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.2352/issn.2470-1173.2017.1.vda-388","is_oa":false,"landing_page_url":"https://doi.org/10.2352/issn.2470-1173.2017.1.vda-388","pdf_url":null,"source":{"id":"https://openalex.org/S4210227276","display_name":"Electronic Imaging","issn_l":"2470-1173","issn":["2470-1173"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Electronic Imaging","raw_type":"journal-article"},{"id":"pmh:oai:publica.fraunhofer.de:publica/395205","is_oa":false,"landing_page_url":"https://publica.fraunhofer.de/handle/publica/395205","pdf_url":null,"source":{"id":"https://openalex.org/S4306400318","display_name":"Fraunhofer-Publica (Fraunhofer-Gesellschaft)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4923324","host_organization_name":"Fraunhofer-Gesellschaft","host_organization_lineage":["https://openalex.org/I4923324"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"conference paper"},{"id":"pmh:oai:tubiblio.ulb.tu-darmstadt.de:120541","is_oa":false,"landing_page_url":"http://tubiblio.ulb.tu-darmstadt.de/120541/","pdf_url":null,"source":{"id":"https://openalex.org/S4377196390","display_name":"TUbilio (Technical University of Darmstadt)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I31512782","host_organization_name":"Technische Universit\u00e4t Darmstadt","host_organization_lineage":["https://openalex.org/I31512782"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Konferenzver\u00f6ffentlichung"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W2954628966","https://openalex.org/W1997571094","https://openalex.org/W2891223594","https://openalex.org/W2049508213","https://openalex.org/W58095113","https://openalex.org/W2042494732","https://openalex.org/W2387287989","https://openalex.org/W2019737068","https://openalex.org/W2378900857","https://openalex.org/W2591691005"],"abstract_inverted_index":{"The":[0,92],"exploration":[1],"of":[2,26,45,81,100,161,180],"text":[3,82,102],"document":[4,90,169,192],"collections":[5,193],"is":[6],"a":[7,184,195],"complex":[8],"and":[9,40,63,79,120,134,150,154,167,197],"cumbersome":[10],"task.":[11],"Clustering":[12],"techniques":[13],"can":[14,128,141,164,173,190],"help":[15],"to":[16,55,88,96],"group":[17],"documents":[18],"based":[19,59,144],"on":[20,60,145],"their":[21,135],"content":[22,148],"for":[23,76],"the":[24,29,61,77,86,101,110,138,146,159,178,181],"generation":[25],"overviews.":[27],"However,":[28],"underlying":[30],"clustering":[31,37,50,57,83,103,132,139],"workflows":[32,84],"comprising":[33],"preprocessing,":[34],"feature":[35,111,115,118,152],"selection,":[36],"algorithm":[38],"selection":[39,112,116],"parameterization":[41],"offer":[42],"several":[43],"degrees":[44],"freedom.":[46],"Since":[47],"no":[48],"\"best\"":[49],"workflow":[51],"exists,":[52],"users":[53,95,106,127,189],"have":[54],"evaluate":[56],"results":[58,140,160],"data":[62],"analysis":[64],"tasks":[65],"at":[66],"hand.":[67],"In":[68],"our":[69],"approach,":[70],"we":[71],"present":[72],"an":[73],"interactive":[74,198],"system":[75,93,182],"creation":[78],"validation":[80],"with":[85,183],"goal":[87],"explore":[89,191],"collections.":[91],"allows":[94],"control":[97],"every":[98],"step":[99],"workflow.":[104],"First,":[105],"are":[107],"supported":[108],"in":[109,171,194],"process":[113],"via":[114],"metrics-based":[117],"ranking":[119],"linguistic":[121],"filtering":[122],"(e.g.,":[123],"part-of-speech":[124],"filtering).":[125],"Second,":[126],"choose":[129],"between":[130],"different":[131,162],"methods":[133],"parameterizations.":[136],"Third,":[137],"be":[142,165,174],"explored":[143],"cluster":[147,155],"(documents":[149],"relevant":[151],"terms),":[153],"quality":[156],"measures.":[157],"Fourth,":[158],"clusterings":[163],"compared,":[166],"frequent":[168],"subsets":[170],"clusters":[172],"identified.":[175],"We":[176],"validate":[177],"usefulness":[179],"usage":[185],"scenario":[186],"describing":[187],"how":[188],"visual":[196],"way.":[199]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
