{"id":"https://openalex.org/W4226212288","doi":"https://doi.org/10.1145/3486622.3493952","title":"Improving Topic Modeling Performance through N-gram Removal","display_name":"Improving Topic Modeling Performance through N-gram Removal","publication_year":2021,"publication_date":"2021-12-14","ids":{"openalex":"https://openalex.org/W4226212288","doi":"https://doi.org/10.1145/3486622.3493952"},"language":"en","primary_location":{"id":"doi:10.1145/3486622.3493952","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3486622.3493952","pdf_url":null,"source":{"id":"https://openalex.org/S4363608074","display_name":"IEEE/WIC/ACM International Conference on Web Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/WIC/ACM International Conference on Web Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082552295","display_name":"Mohamad Almgerbi","orcid":"https://orcid.org/0000-0002-7197-0202"},"institutions":[{"id":"https://openalex.org/I45084792","display_name":"University of Florence","ror":"https://ror.org/04jr1s763","country_code":"IT","type":"education","lineage":["https://openalex.org/I45084792"]}],"countries":["IT"],"is_corresponding":true,"raw_author_name":"Mohamad Almgerbi","raw_affiliation_strings":["University of Florence, Italy"],"affiliations":[{"raw_affiliation_string":"University of Florence, Italy","institution_ids":["https://openalex.org/I45084792"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074496477","display_name":"Andrea De Mauro","orcid":"https://orcid.org/0000-0001-9050-5018"},"institutions":[{"id":"https://openalex.org/I116067653","display_name":"University of Rome Tor Vergata","ror":"https://ror.org/02p77k626","country_code":"IT","type":"education","lineage":["https://openalex.org/I116067653"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Andrea De Mauro","raw_affiliation_strings":["University of Rome Tor Vergata, Italy"],"affiliations":[{"raw_affiliation_string":"University of Rome Tor Vergata, Italy","institution_ids":["https://openalex.org/I116067653"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015094667","display_name":"Adham Kahlawi","orcid":"https://orcid.org/0000-0003-4040-5590"},"institutions":[{"id":"https://openalex.org/I45084792","display_name":"University of Florence","ror":"https://ror.org/04jr1s763","country_code":"IT","type":"education","lineage":["https://openalex.org/I45084792"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Adham Kahlawi","raw_affiliation_strings":["University of Florence, Italy"],"affiliations":[{"raw_affiliation_string":"University of Florence, Italy","institution_ids":["https://openalex.org/I45084792"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5089228629","display_name":"Valentina Poggioni","orcid":"https://orcid.org/0000-0002-7691-7478"},"institutions":[{"id":"https://openalex.org/I27483092","display_name":"University of Perugia","ror":"https://ror.org/00x27da85","country_code":"IT","type":"education","lineage":["https://openalex.org/I27483092"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Valentina Poggioni","raw_affiliation_strings":["University of Perugia, Italy"],"affiliations":[{"raw_affiliation_string":"University of Perugia, Italy","institution_ids":["https://openalex.org/I27483092"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5082552295"],"corresponding_institution_ids":["https://openalex.org/I45084792"],"apc_list":null,"apc_paid":null,"fwci":0.2513,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.5505702,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"162","last_page":"169"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.96670001745224,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/gram","display_name":"Gram","score":0.6668644547462463},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.6402862071990967},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6159132122993469},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.22248953580856323},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.1282494068145752},{"id":"https://openalex.org/keywords/geology","display_name":"Geology","score":0.06841820478439331}],"concepts":[{"id":"https://openalex.org/C161369605","wikidata":"https://www.wikidata.org/wiki/Q41803","display_name":"Gram","level":3,"score":0.6668644547462463},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.6402862071990967},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6159132122993469},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.22248953580856323},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.1282494068145752},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.06841820478439331},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C523546767","wikidata":"https://www.wikidata.org/wiki/Q10876","display_name":"Bacteria","level":2,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3486622.3493952","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3486622.3493952","pdf_url":null,"source":{"id":"https://openalex.org/S4363608074","display_name":"IEEE/WIC/ACM International Conference on Web Intelligence","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/WIC/ACM International Conference on Web Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:flore.unifi.it:2158/1266600","is_oa":false,"landing_page_url":"http://hdl.handle.net/2158/1266600","pdf_url":null,"source":{"id":"https://openalex.org/S4306402033","display_name":"Florence Research (University of Florence)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I45084792","host_organization_name":"University of Florence","host_organization_lineage":["https://openalex.org/I45084792"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W2042980227","https://openalex.org/W2128713194","https://openalex.org/W2240884068","https://openalex.org/W2250533720","https://openalex.org/W2372596980","https://openalex.org/W2394891603","https://openalex.org/W2524620548","https://openalex.org/W2601780063","https://openalex.org/W2604569350","https://openalex.org/W2623560286","https://openalex.org/W2741172726","https://openalex.org/W2742034229","https://openalex.org/W2760540930","https://openalex.org/W2783091141","https://openalex.org/W2802789949","https://openalex.org/W2802994254","https://openalex.org/W2808079449","https://openalex.org/W2912517510","https://openalex.org/W2927273556","https://openalex.org/W2952214074","https://openalex.org/W2952632799","https://openalex.org/W2962686197","https://openalex.org/W2963726741","https://openalex.org/W2971825409","https://openalex.org/W2972110483","https://openalex.org/W2983199300","https://openalex.org/W3006801982","https://openalex.org/W3028100809","https://openalex.org/W3112924188","https://openalex.org/W3154263804","https://openalex.org/W4230834535","https://openalex.org/W4385885462","https://openalex.org/W6794276298"],"related_works":["https://openalex.org/W2906970013","https://openalex.org/W3126081632","https://openalex.org/W2959686711","https://openalex.org/W2747014888","https://openalex.org/W2088254117","https://openalex.org/W3033292598","https://openalex.org/W2790860321","https://openalex.org/W2955248800","https://openalex.org/W3084943335","https://openalex.org/W27703600"],"abstract_inverted_index":{"In":[0,23,45],"recent":[1],"years,":[2],"topic":[3,30],"modeling":[4,31],"has":[5],"been":[6,43],"increasingly":[7],"adopted":[8],"for":[9],"finding":[10],"conceptual":[11],"patterns":[12],"in":[13,68],"large":[14],"corpora":[15],"of":[16,29,61,65,76,79,98,105],"digital":[17],"documents":[18],"to":[19,25],"organize":[20],"them":[21],"accordingly.":[22],"order":[24],"enhance":[26],"the":[27,58,74,77,96,102],"performance":[28,85,97],"algorithms,":[32],"such":[33],"as":[34],"Latent":[35],"Dirichlet":[36],"Allocation":[37],"(LDA),":[38],"multiple":[39],"preprocessing":[40,54],"steps":[41],"have":[42,72],"proposed.":[44],"this":[46],"paper,":[47],"we":[48,87],"introduce":[49],"N-gram":[50,80],"Removal,":[51],"a":[52,62],"novel":[53],"procedure":[55],"based":[56],"on":[57],"systematic":[59],"elimination":[60],"dynamic":[63],"number":[64],"repeated":[66],"words":[67],"text":[69],"documents.":[70],"We":[71],"evaluated":[73],"effects":[75],"utilization":[78],"Removal":[81],"through":[82],"four":[83],"different":[84],"metrics:":[86],"concluded":[88],"that":[89],"its":[90],"application":[91],"is":[92],"effective":[93],"at":[94],"improving":[95],"LDA":[99],"and":[100],"enhances":[101],"human":[103],"interpretation":[104],"topics":[106],"models.":[107]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
