{"id":"https://openalex.org/W2619129630","doi":"https://doi.org/10.18653/v1/w17-2628","title":"Does the Geometry of Word Embeddings Help Document Classification? A Case Study on Persistent Homology-Based Representations","display_name":"Does the Geometry of Word Embeddings Help Document Classification? A Case Study on Persistent Homology-Based Representations","publication_year":2017,"publication_date":"2017-01-01","ids":{"openalex":"https://openalex.org/W2619129630","doi":"https://doi.org/10.18653/v1/w17-2628","mag":"2619129630"},"language":"en","primary_location":{"id":"doi:10.18653/v1/w17-2628","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-2628","pdf_url":"https://www.aclweb.org/anthology/W17-2628.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd Workshop on Representation Learning for NLP","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/W17-2628.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109949937","display_name":"Paul Michel","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Paul Michel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5021017923","display_name":"Abhilasha Ravichander","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abhilasha Ravichander","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5014428509","display_name":"Shruti Rijhwani","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shruti Rijhwani","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.7178,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.72689617,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"235","last_page":"240"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12536","display_name":"Topological and Geometric Data Analysis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12536","display_name":"Topological and Geometric Data Analysis","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9549999833106995,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6154054403305054},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6033428907394409},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.5737681984901428},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5212091207504272},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.5122939348220825},{"id":"https://openalex.org/keywords/topological-data-analysis","display_name":"Topological data analysis","score":0.47973862290382385},{"id":"https://openalex.org/keywords/document-classification","display_name":"Document classification","score":0.4709392786026001},{"id":"https://openalex.org/keywords/simple","display_name":"Simple (philosophy)","score":0.4330224096775055},{"id":"https://openalex.org/keywords/computational-topology","display_name":"Computational topology","score":0.42946934700012207},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.42158204317092896},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.4112669825553894},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3749867081642151},{"id":"https://openalex.org/keywords/theoretical-computer-science","display_name":"Theoretical computer science","score":0.3428286910057068},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32553648948669434},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3249627947807312},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.27576562762260437},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.23907425999641418},{"id":"https://openalex.org/keywords/geometry","display_name":"Geometry","score":0.12855792045593262}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6154054403305054},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6033428907394409},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.5737681984901428},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5212091207504272},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.5122939348220825},{"id":"https://openalex.org/C2776477805","wikidata":"https://www.wikidata.org/wiki/Q4460773","display_name":"Topological data analysis","level":2,"score":0.47973862290382385},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.4709392786026001},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.4330224096775055},{"id":"https://openalex.org/C181576044","wikidata":"https://www.wikidata.org/wiki/Q4129926","display_name":"Computational topology","level":3,"score":0.42946934700012207},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.42158204317092896},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.4112669825553894},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3749867081642151},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.3428286910057068},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32553648948669434},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3249627947807312},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.27576562762260437},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.23907425999641418},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.12855792045593262},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C110521144","wikidata":"https://www.wikidata.org/wiki/Q193460","display_name":"Scalar field","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.18653/v1/w17-2628","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-2628","pdf_url":"https://www.aclweb.org/anthology/W17-2628.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd Workshop on Representation Learning for NLP","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1705.10900","is_oa":true,"landing_page_url":"http://arxiv.org/abs/1705.10900","pdf_url":"https://arxiv.org/pdf/1705.10900","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"mag:2619129630","is_oa":true,"landing_page_url":"https://arxiv.org/pdf/1705.10900.pdf","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"doi:10.48550/arxiv.1705.10900","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.1705.10900","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.18653/v1/w17-2628","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/w17-2628","pdf_url":"https://www.aclweb.org/anthology/W17-2628.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd Workshop on Representation Learning for NLP","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6299999952316284,"display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G8016956108","display_name":null,"funder_award_id":"HR0011-15-C-0114","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320306078","display_name":"U.S. Department of Defense","ror":"https://ror.org/0447fe631"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320332815","display_name":"Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2619129630.pdf","grobid_xml":"https://content.openalex.org/works/W2619129630.grobid-xml"},"referenced_works_count":22,"referenced_works":["https://openalex.org/W33249699","https://openalex.org/W80463681","https://openalex.org/W100415715","https://openalex.org/W658020064","https://openalex.org/W1482513167","https://openalex.org/W1651093245","https://openalex.org/W1901057238","https://openalex.org/W1985222241","https://openalex.org/W2005276292","https://openalex.org/W2070670538","https://openalex.org/W2072240081","https://openalex.org/W2108776879","https://openalex.org/W2125101937","https://openalex.org/W2145036943","https://openalex.org/W2245836998","https://openalex.org/W2251581759","https://openalex.org/W2752172973","https://openalex.org/W2893578842","https://openalex.org/W2949547296","https://openalex.org/W2950133940","https://openalex.org/W2950726992","https://openalex.org/W2952186591"],"related_works":["https://openalex.org/W2963946103","https://openalex.org/W2990173481","https://openalex.org/W3021327341","https://openalex.org/W2616922365","https://openalex.org/W2989957828","https://openalex.org/W3103025252","https://openalex.org/W3148285962","https://openalex.org/W941230081","https://openalex.org/W2413820286","https://openalex.org/W2990053927","https://openalex.org/W2769297372","https://openalex.org/W2786959368","https://openalex.org/W2907636479","https://openalex.org/W3126955718","https://openalex.org/W3008038783","https://openalex.org/W71569195","https://openalex.org/W3199446243","https://openalex.org/W2626573800","https://openalex.org/W2515351093","https://openalex.org/W2890809607"],"abstract_inverted_index":{"We":[0,69],"investigate":[1],"the":[2,16,37,40,43,52,72,91,94,104,111],"pertinence":[3],"of":[4,18,25,39,54,93,106],"methods":[5,14],"from":[6,22],"algebraic":[7],"topology":[8],"for":[9,101],"text":[10,77],"data":[11],"analysis.":[12,78],"These":[13],"enable":[15],"development":[17],"mathematically-principled":[19],"isometric-invariant":[20],"mappings":[21],"a":[23,28],"set":[24],"vectors":[26],"to":[27,36],"document":[29,41,57,64,95],"embedding,":[30],"which":[31],"is":[32,82],"stable":[33],"with":[34],"respect":[35],"geometry":[38,92],"in":[42,59,110],"selected":[44],"metric":[45],"space.":[46],"In":[47,79],"this":[48],"work,":[49],"we":[50],"evaluate":[51],"utility":[53],"these":[55],"topology-based":[56],"representations":[58],"traditional":[60],"NLP":[61],"tasks,":[62],"specifically":[63],"clustering":[65],"and":[66],"sentiment":[67,109],"classification.":[68],"find":[70],"that":[71,90],"embeddings":[73],"do":[74],"not":[75,97],"benefit":[76],"fact,":[80],"performance":[81],"worse":[83],"than":[84],"simple":[85],"techniques":[86],"like":[87],"tf-idf,":[88],"indicating":[89],"does":[96],"provide":[98],"enough":[99],"variability":[100],"classification":[102],"on":[103],"basis":[105],"topic":[107],"or":[108],"chosen":[112],"datasets.":[113]},"counts_by_year":[{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
