{"id":"https://openalex.org/W4396832027","doi":"https://doi.org/10.1145/3613905.3650798","title":"Automatic Histograms: Leveraging Language Models for Text Dataset Exploration","display_name":"Automatic Histograms: Leveraging Language Models for Text Dataset Exploration","publication_year":2024,"publication_date":"2024-05-11","ids":{"openalex":"https://openalex.org/W4396832027","doi":"https://doi.org/10.1145/3613905.3650798"},"language":"en","primary_location":{"id":"doi:10.1145/3613905.3650798","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3613905.3650798","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3613905.3650798","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Extended Abstracts of the CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3613905.3650798","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5019880413","display_name":"Emily Reif","orcid":"https://orcid.org/0000-0003-3572-6234"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Emily Reif","raw_affiliation_strings":["Google Research, United States"],"affiliations":[{"raw_affiliation_string":"Google Research, United States","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043246682","display_name":"Crystal Qian","orcid":"https://orcid.org/0000-0001-7716-7245"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Crystal Qian","raw_affiliation_strings":["Google Research, United States"],"affiliations":[{"raw_affiliation_string":"Google Research, United States","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5081809692","display_name":"James Wexler","orcid":"https://orcid.org/0009-0006-8105-6998"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"James Wexler","raw_affiliation_strings":["Google Research, United States"],"affiliations":[{"raw_affiliation_string":"Google Research, United States","institution_ids":["https://openalex.org/I1291425158"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5042350842","display_name":"Minsuk Kahng","orcid":"https://orcid.org/0000-0002-0291-6026"},"institutions":[{"id":"https://openalex.org/I1291425158","display_name":"Google (United States)","ror":"https://ror.org/00njsd438","country_code":"US","type":"company","lineage":["https://openalex.org/I1291425158","https://openalex.org/I4210128969"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Minsuk Kahng","raw_affiliation_strings":["People + AI Research (PAIR), Google, United States"],"affiliations":[{"raw_affiliation_string":"People + AI Research (PAIR), Google, United States","institution_ids":["https://openalex.org/I1291425158"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5019880413"],"corresponding_institution_ids":["https://openalex.org/I1291425158"],"apc_list":null,"apc_paid":null,"fwci":3.2635,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.92692796,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9940000176429749,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8236651420593262},{"id":"https://openalex.org/keywords/sensemaking","display_name":"Sensemaking","score":0.7445233464241028},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.6071962714195251},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.5812650322914124},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5734507441520691},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5285006165504456},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.525833249092102},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.27438801527023315},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.17956271767616272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8236651420593262},{"id":"https://openalex.org/C2780554381","wikidata":"https://www.wikidata.org/wiki/Q2063340","display_name":"Sensemaking","level":2,"score":0.7445233464241028},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.6071962714195251},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.5812650322914124},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5734507441520691},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5285006165504456},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.525833249092102},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.27438801527023315},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.17956271767616272},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3613905.3650798","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3613905.3650798","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3613905.3650798","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Extended Abstracts of the CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3613905.3650798","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3613905.3650798","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3613905.3650798","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Extended Abstracts of the CHI Conference on Human Factors in Computing Systems","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.7900000214576721,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4396832027.pdf"},"referenced_works_count":13,"referenced_works":["https://openalex.org/W1544268587","https://openalex.org/W1880262756","https://openalex.org/W2087382273","https://openalex.org/W2465309725","https://openalex.org/W2576683119","https://openalex.org/W2997591727","https://openalex.org/W3118813946","https://openalex.org/W3212368439","https://openalex.org/W3213241618","https://openalex.org/W4237375617","https://openalex.org/W4283155630","https://openalex.org/W4378908626","https://openalex.org/W4389519061"],"related_works":["https://openalex.org/W3002559787","https://openalex.org/W2100609754","https://openalex.org/W2050640900","https://openalex.org/W2049050102","https://openalex.org/W2596767525","https://openalex.org/W1886987011","https://openalex.org/W2795557596","https://openalex.org/W3045759591","https://openalex.org/W2331546953","https://openalex.org/W2023412717"],"abstract_inverted_index":{"Making":[0],"sense":[1],"of":[2,25,109,142,157],"unstructured":[3],"text":[4],"datasets":[5],"is":[6,73],"perennially":[7],"difficult,":[8,76],"yet":[9],"increasingly":[10],"relevant":[11,36,92],"with":[12,115],"Large":[13],"Language":[14],"Models.":[15],"Data":[16],"practitioners":[17,64],"often":[18,65],"rely":[19],"on":[20],"dataset":[21,105],"summaries,":[22],"especially":[23],"distributions":[24],"various":[26],"derived":[27],"features.":[28],"Some":[29],"features,":[30,94],"like":[31],"toxicity":[32],"or":[33,54,77],"topics,":[34],"are":[35,44],"to":[37,101,125,128,133,153],"many":[38,41],"datasets,":[39],"but":[40],"interesting":[42],"features":[43],"domain":[45],"specific:":[46],"instruments":[47],"and":[48,56,75,97,137,149],"genres":[49],"for":[50,58,69,106],"a":[51,59,84,112,139],"music":[52],"dataset,":[53,71],"diseases":[55],"symptoms":[57],"medical":[60],"dataset.":[61],"Accordingly,":[62],"data":[63,117],"run":[66],"custom":[67],"analyses":[68],"each":[70],"which":[72],"cumbersome":[74],"use":[78,130,144],"unsupervised":[79],"methods.":[80],"We":[81],"present":[82],"AutoHistograms,":[83,129],"visualization":[85],"tool":[86,132,148],"leveraging":[87],"LLMs.":[88],"AutoHistograms":[89],"automatically":[90],"identifies":[91],"entity-based":[93],"visualizes":[95],"them,":[96],"allows":[98],"the":[99,104,131,154],"user":[100,113,150],"interactively":[102],"query":[103],"new":[107],"categories":[108],"entities.":[110],"In":[111],"study":[114,151],"(n=10)":[116],"practitioners,":[118],"we":[119],"observe":[120],"that":[121],"participants":[122],"were":[123],"able":[124],"quickly":[126],"onboard":[127],"identify":[134],"actionable":[135],"insights,":[136],"conceptualize":[138],"broad":[140],"range":[141],"applicable":[143],"cases.":[145],"Together,":[146],"this":[147],"contribute":[152],"growing":[155],"field":[156],"LLM-assisted":[158],"sensemaking":[159],"tools.":[160]},"counts_by_year":[{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
