{"id":"https://openalex.org/W4318187169","doi":"https://doi.org/10.1109/bigdata55660.2022.10020645","title":"Graggle: A Graph-based Approach to Document Clustering","display_name":"Graggle: A Graph-based Approach to Document Clustering","publication_year":2022,"publication_date":"2022-12-17","ids":{"openalex":"https://openalex.org/W4318187169","doi":"https://doi.org/10.1109/bigdata55660.2022.10020645"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata55660.2022.10020645","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020645","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5091390385","display_name":"Isaiah J. King","orcid":"https://orcid.org/0000-0003-2866-4135"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Isaiah J. King","raw_affiliation_strings":["The George Washington University,GraphLab,USA","GraphLab, The George Washington University, USA"],"affiliations":[{"raw_affiliation_string":"The George Washington University,GraphLab,USA","institution_ids":["https://openalex.org/I193531525"]},{"raw_affiliation_string":"GraphLab, The George Washington University, USA","institution_ids":["https://openalex.org/I193531525"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5002254350","display_name":"H. Howie Huang","orcid":"https://orcid.org/0000-0001-8588-7680"},"institutions":[{"id":"https://openalex.org/I193531525","display_name":"George Washington University","ror":"https://ror.org/00y4zzh67","country_code":"US","type":"education","lineage":["https://openalex.org/I193531525"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"H. Howie Huang","raw_affiliation_strings":["The George Washington University,GraphLab,USA","GraphLab, The George Washington University, USA"],"affiliations":[{"raw_affiliation_string":"The George Washington University,GraphLab,USA","institution_ids":["https://openalex.org/I193531525"]},{"raw_affiliation_string":"GraphLab, The George Washington University, USA","institution_ids":["https://openalex.org/I193531525"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5091390385"],"corresponding_institution_ids":["https://openalex.org/I193531525"],"apc_list":null,"apc_paid":null,"fwci":0.1039,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.35197156,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":"11","issue":null,"first_page":"748","last_page":"755"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8071777820587158},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.6469049453735352},{"id":"https://openalex.org/keywords/graph","display_name":"Graph","score":0.5635721683502197},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.5458952188491821},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5226994752883911},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.5197107791900635},{"id":"https://openalex.org/keywords/dimensionality-reduction","display_name":"Dimensionality reduction","score":0.48877808451652527},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.47756439447402954},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.443486750125885},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3700139820575714},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.35714811086654663},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.33573591709136963},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.2093176543712616},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.15013697743415833}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8071777820587158},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.6469049453735352},{"id":"https://openalex.org/C132525143","wikidata":"https://www.wikidata.org/wiki/Q141488","display_name":"Graph","level":2,"score":0.5635721683502197},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.5458952188491821},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5226994752883911},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.5197107791900635},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.48877808451652527},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.47756439447402954},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.443486750125885},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3700139820575714},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.35714811086654663},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.33573591709136963},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2093176543712616},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.15013697743415833}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata55660.2022.10020645","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/bigdata55660.2022.10020645","pdf_url":null,"source":{"id":"https://openalex.org/S4363607709","display_name":"2022 IEEE International Conference on Big Data (Big Data)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6700000166893005}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W151024599","https://openalex.org/W1996764654","https://openalex.org/W2029810944","https://openalex.org/W2070412788","https://openalex.org/W2131971232","https://openalex.org/W2140321362","https://openalex.org/W2150874198","https://openalex.org/W2163264097","https://openalex.org/W2187089797","https://openalex.org/W2558405088","https://openalex.org/W2559655401","https://openalex.org/W2753434909","https://openalex.org/W2803263920","https://openalex.org/W2808117254","https://openalex.org/W2896457183","https://openalex.org/W2904779692","https://openalex.org/W2912231714","https://openalex.org/W2962756421","https://openalex.org/W2963223306","https://openalex.org/W2963600562","https://openalex.org/W2997770686","https://openalex.org/W3000172589","https://openalex.org/W3020786614","https://openalex.org/W3030030185","https://openalex.org/W4221014796","https://openalex.org/W4294170691","https://openalex.org/W4297733535","https://openalex.org/W4322614756","https://openalex.org/W4385245566","https://openalex.org/W6679775712","https://openalex.org/W6681103018","https://openalex.org/W6682691769","https://openalex.org/W6685777803","https://openalex.org/W6719270105","https://openalex.org/W6726873649","https://openalex.org/W6730084236","https://openalex.org/W6734716764","https://openalex.org/W6755207826","https://openalex.org/W6776225533"],"related_works":["https://openalex.org/W1965294778","https://openalex.org/W2381351160","https://openalex.org/W2361349944","https://openalex.org/W2076264610","https://openalex.org/W2469462725","https://openalex.org/W4310502949","https://openalex.org/W4254379378","https://openalex.org/W2084367595","https://openalex.org/W3123208428","https://openalex.org/W1237351826"],"abstract_inverted_index":{"Document":[0],"recommendation":[1],"systems":[2],"have":[3,39,118],"traditionally":[4],"relied":[5],"upon":[6],"high-dimensional":[7],"vector":[8,92],"representations":[9],"that":[10,102],"scale":[11],"poorly":[12],"in":[13],"corpora":[14],"with":[15,95],"diverse":[16],"vocabularies.":[17],"Existing":[18],"graph-based":[19],"approaches":[20,112],"focus":[21],"on":[22,113],"the":[23,30,33,130],"metadata":[24],"of":[25,32,125],"documents":[26,128],"and,":[27],"unfortunately,":[28],"ignore":[29],"content":[31],"papers.":[34],"In":[35],"this":[36,77,103,120],"work,":[37],"we":[38,46,117],"designed":[40],"and":[41,60,109],"implemented":[42],"a":[43,51,55,80,96,123,138],"new":[44],"system":[45],"call":[47],"Graggle,":[48],"which":[49],"builds":[50],"graph":[52,72,78,97],"to":[53,75,122,133],"model":[54],"corpus.":[56],"Nodes":[57],"are":[58,88],"papers,":[59],"edges":[61],"represent":[62],"significant":[63],"words":[64],"shared":[65],"between":[66],"them.":[67],"We":[68],"then":[69],"leverage":[70],"modern":[71],"learning":[73],"techniques":[74],"turn":[76],"into":[79],"highly":[81],"efficient":[82],"tool":[83],"for":[84],"dimensionality":[85],"reduction.":[86],"Documents":[87],"represented":[89],"as":[90,137],"low-dimensional":[91],"embeddings":[93],"generated":[94],"autoencoder.":[98],"Our":[99],"experiments":[100],"show":[101],"approach":[104],"outperforms":[105],"traditional":[106],"document":[107],"vector-based":[108],"text":[110],"autoencoding":[111],"labeled":[114],"data.":[115],"Additionally,":[116],"applied":[119],"technique":[121],"repository":[124],"unlabeled":[126],"research":[127],"about":[129],"novel":[131],"coronavirus":[132],"demonstrate":[134],"its":[135],"effectiveness":[136],"real-world":[139],"tool.":[140]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-12-24T23:09:58.560324","created_date":"2025-10-10T00:00:00"}
