{"id":"https://openalex.org/W3009632519","doi":"https://doi.org/10.1145/3374587.3374590","title":"Text Similarity Calculation Method Based on Hybrid Model of LDA and TF-IDF","display_name":"Text Similarity Calculation Method Based on Hybrid Model of LDA and TF-IDF","publication_year":2019,"publication_date":"2019-12-06","ids":{"openalex":"https://openalex.org/W3009632519","doi":"https://doi.org/10.1145/3374587.3374590","mag":"3009632519"},"language":"en","primary_location":{"id":"doi:10.1145/3374587.3374590","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3374587.3374590","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 3rd International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5042052440","display_name":"Jiangyao Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jiangyao Wang","raw_affiliation_strings":["School of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101988391","display_name":"Wenhua Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhua Xu","raw_affiliation_strings":["School of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103277227","display_name":"Wenhao Yan","orcid":"https://orcid.org/0000-0002-6147-5320"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenhao Yan","raw_affiliation_strings":["School of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100404164","display_name":"Caixia Li","orcid":"https://orcid.org/0000-0003-1717-160X"},"institutions":[{"id":"https://openalex.org/I59028903","display_name":"Ocean University of China","ror":"https://ror.org/04rdtx186","country_code":"CN","type":"education","lineage":["https://openalex.org/I59028903"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Caixia Li","raw_affiliation_strings":["School of Information Science and Engineering, Ocean University of China, Qingdao, China"],"affiliations":[{"raw_affiliation_string":"School of Information Science and Engineering, Ocean University of China, Qingdao, China","institution_ids":["https://openalex.org/I59028903"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5042052440"],"corresponding_institution_ids":["https://openalex.org/I59028903"],"apc_list":null,"apc_paid":null,"fwci":0.9801,"has_fulltext":false,"cited_by_count":19,"citation_normalized_percentile":{"value":0.82923706,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9886000156402588,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9864000082015991,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9783999919891357,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7620406746864319},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7580512166023254},{"id":"https://openalex.org/keywords/vector-space-model","display_name":"Vector space model","score":0.6403409242630005},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.6009801030158997},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5708309412002563},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.49837446212768555},{"id":"https://openalex.org/keywords/space","display_name":"Space (punctuation)","score":0.4946105182170868},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.4705125689506531},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.46519267559051514},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.4590621590614319},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4085954427719116},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35251951217651367},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07944920659065247}],"concepts":[{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7620406746864319},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7580512166023254},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.6403409242630005},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.6009801030158997},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5708309412002563},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.49837446212768555},{"id":"https://openalex.org/C2778572836","wikidata":"https://www.wikidata.org/wiki/Q380933","display_name":"Space (punctuation)","level":2,"score":0.4946105182170868},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.4705125689506531},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46519267559051514},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.4590621590614319},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4085954427719116},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35251951217651367},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07944920659065247},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3374587.3374590","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3374587.3374590","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2019 3rd International Conference on Computer Science and Artificial Intelligence","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.5699999928474426}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1880262756","https://openalex.org/W1978394996","https://openalex.org/W2024932032","https://openalex.org/W2027752285","https://openalex.org/W2028742638","https://openalex.org/W2039771848","https://openalex.org/W2096468639","https://openalex.org/W2096545258","https://openalex.org/W2101903972","https://openalex.org/W2104924585","https://openalex.org/W2135940730","https://openalex.org/W2150874198","https://openalex.org/W2170738476","https://openalex.org/W2171313960","https://openalex.org/W2510940142","https://openalex.org/W2554987092","https://openalex.org/W2605889996","https://openalex.org/W2609923048","https://openalex.org/W2752172973","https://openalex.org/W2897098357","https://openalex.org/W2905481045","https://openalex.org/W2964350971","https://openalex.org/W4230872509","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2549551998","https://openalex.org/W3036265557","https://openalex.org/W2900987978","https://openalex.org/W2975262101","https://openalex.org/W2796524599","https://openalex.org/W2163957714","https://openalex.org/W3109760095","https://openalex.org/W3150575538","https://openalex.org/W3028990185","https://openalex.org/W2372520056"],"abstract_inverted_index":{"The":[0,67,148,176],"traditional":[1,78],"TF-IDF-based":[2],"text":[3,12,23,28,53,68,94,105,116,135,146,157,165,188,207],"similarity":[4,21,69,80,136,172,208],"calculation":[5,209],"model":[6,70,75,138,149,183],"uses":[7,150],"statistical":[8],"methods":[9,31],"to":[10,13,43,166],"map":[11],"the":[14,20,25,47,52,56,64,73,77,88,93,100,112,128,151,156,160,164,171,174,181,187,191,201,206],"keyword":[15,82,161],"vector":[16],"space":[17],"and":[18,41,84,142,159,169,194],"convert":[19],"of":[22,46,81,102,114,127],"into":[24,117],"distance":[26],"between":[27,173],"vectors.":[29],"Such":[30],"have":[32],"problems":[33],"such":[34],"as":[35,61,63],"high":[36],"computational":[37],"dimensions,":[38],"sparse":[39],"data,":[40],"inability":[42],"take":[44],"advantage":[45],"semantic":[48,89,106,152],"information":[49,90,122,153,162,189],"contained":[50,91,154],"in":[51,92,155,200],"itself,":[54],"so":[55],"results":[57,178],"obtained":[58],"are":[59],"not":[60],"similar":[62],"physical":[65],"text.":[66],"based":[71],"on":[72,104],"topic":[74,118],"changes":[76],"spatial":[79],"vectors,":[83],"can":[85,184],"fully":[86],"utilize":[87],"itself.":[95],"But":[96],"this":[97,131],"approach":[98],"ignores":[99],"effect":[101],"words":[103],"representations":[107],"with":[108],"different":[109],"weights.":[110],"In":[111,125],"process":[113],"converting":[115],"feature":[119],"space,":[120],"valuable":[121],"is":[123],"lost.":[124],"view":[126],"above":[129],"problems,":[130],"paper":[132],"proposes":[133],"a":[134,196],"hybrid":[137,182],"(L-THM)":[139],"integrating":[140],"LDA":[141],"TF-IDF":[143],"for":[144],"calculating":[145],"similarity.":[147],"itself":[158],"reflecting":[163],"comprehensively":[167],"analyses":[168],"calculates":[170],"texts.":[175],"experimental":[177],"show":[179],"that":[180],"better":[185],"represent":[186],"than":[190],"single":[192],"model,":[193],"obtain":[195],"good":[197],"F":[198],"value":[199],"cluster,":[202],"which":[203],"effectively":[204],"improves":[205],"effect.":[210]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
