{"id":"https://openalex.org/W2040499054","doi":"https://doi.org/10.1145/2513228.2513245","title":"A semantic similarity measure in document databases","display_name":"A semantic similarity measure in document databases","publication_year":2013,"publication_date":"2013-10-01","ids":{"openalex":"https://openalex.org/W2040499054","doi":"https://doi.org/10.1145/2513228.2513245","mag":"2040499054"},"language":"en","primary_location":{"id":"doi:10.1145/2513228.2513245","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2513228.2513245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 Research in Adaptive and Convergent Systems","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5035308856","display_name":"Min-Hee Jang","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Min-Hee Jang","raw_affiliation_strings":["Hanyang University, Korea",", Hanyang University, , Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University, Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":", Hanyang University, , Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046713257","display_name":"Tae-Hwan Eom","orcid":null},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Tae-Hwan Eom","raw_affiliation_strings":["Hanyang University, Korea",", Hanyang University, , Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University, Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":", Hanyang University, , Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100656150","display_name":"Sang\u2010Wook Kim","orcid":"https://orcid.org/0000-0002-6345-9084"},"institutions":[{"id":"https://openalex.org/I4575257","display_name":"Hanyang University","ror":"https://ror.org/046865y68","country_code":"KR","type":"education","lineage":["https://openalex.org/I4575257"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sang-Wook Kim","raw_affiliation_strings":["Hanyang University, Korea",", Hanyang University, , Korea"],"affiliations":[{"raw_affiliation_string":"Hanyang University, Korea","institution_ids":["https://openalex.org/I4575257"]},{"raw_affiliation_string":", Hanyang University, , Korea","institution_ids":["https://openalex.org/I4575257"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040514478","display_name":"Young-Sup Hwang","orcid":"https://orcid.org/0000-0002-8713-9253"},"institutions":[{"id":"https://openalex.org/I51926615","display_name":"Sun Moon University","ror":"https://ror.org/009e5cd49","country_code":"KR","type":"education","lineage":["https://openalex.org/I51926615"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Young-Sup Hwang","raw_affiliation_strings":["Sunmoon University, Korea","[Sunmoon University, Korea]"],"affiliations":[{"raw_affiliation_string":"Sunmoon University, Korea","institution_ids":["https://openalex.org/I51926615"]},{"raw_affiliation_string":"[Sunmoon University, Korea]","institution_ids":["https://openalex.org/I51926615"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5035308856"],"corresponding_institution_ids":["https://openalex.org/I4575257"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.11043172,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"94","last_page":"99"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.7643014192581177},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7307177782058716},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6243512034416199},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.605425238609314},{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.5978567004203796},{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.5375416278839111},{"id":"https://openalex.org/keywords/earth-movers-distance","display_name":"Earth mover's distance","score":0.5123811960220337},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.49196287989616394},{"id":"https://openalex.org/keywords/document-retrieval","display_name":"Document retrieval","score":0.4601159691810608},{"id":"https://openalex.org/keywords/similitude","display_name":"Similitude","score":0.44256073236465454},{"id":"https://openalex.org/keywords/document-classification","display_name":"Document classification","score":0.44065117835998535},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.4203888177871704},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.4199434220790863},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.417710542678833},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4076472520828247},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3665202260017395},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.34483158588409424},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.25309208035469055},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.17323163151741028},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.11833122372627258},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.11260896921157837},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.07342672348022461}],"concepts":[{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.7643014192581177},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7307177782058716},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6243512034416199},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.605425238609314},{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.5978567004203796},{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.5375416278839111},{"id":"https://openalex.org/C82668687","wikidata":"https://www.wikidata.org/wiki/Q3046456","display_name":"Earth mover's distance","level":2,"score":0.5123811960220337},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.49196287989616394},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.4601159691810608},{"id":"https://openalex.org/C143271835","wikidata":"https://www.wikidata.org/wiki/Q254515","display_name":"Similitude","level":2,"score":0.44256073236465454},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.44065117835998535},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.4203888177871704},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.4199434220790863},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.417710542678833},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4076472520828247},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3665202260017395},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.34483158588409424},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.25309208035469055},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.17323163151741028},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.11833122372627258},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.11260896921157837},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.07342672348022461},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2513228.2513245","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2513228.2513245","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2013 Research in Adaptive and Convergent Systems","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7296647855","display_name":null,"funder_award_id":"NIPA-2013-H0401-13-1001","funder_id":"https://openalex.org/F4320322030","funder_display_name":"Ministry of Science, ICT and Future Planning"},{"id":"https://openalex.org/G8414487349","display_name":null,"funder_award_id":"2012047724","funder_id":"https://openalex.org/F4320322120","funder_display_name":"National Research Foundation of Korea"},{"id":"https://openalex.org/G8425057544","display_name":null,"funder_award_id":"NIPA-2013-H0401-13-1001","funder_id":"https://openalex.org/F4320322065","funder_display_name":"National IT Industry Promotion Agency"}],"funders":[{"id":"https://openalex.org/F4320322030","display_name":"Ministry of Science, ICT and Future Planning","ror":"https://ror.org/032e49973"},{"id":"https://openalex.org/F4320322065","display_name":"National IT Industry Promotion Agency","ror":"https://ror.org/026v53e29"},{"id":"https://openalex.org/F4320322120","display_name":"National Research Foundation of Korea","ror":"https://ror.org/013aysd81"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W153995565","https://openalex.org/W642396912","https://openalex.org/W1651093245","https://openalex.org/W1660390307","https://openalex.org/W1880262756","https://openalex.org/W1999625406","https://openalex.org/W2043909051","https://openalex.org/W2102650424","https://openalex.org/W2112050062","https://openalex.org/W2125398996","https://openalex.org/W2137140231","https://openalex.org/W2140190241","https://openalex.org/W2143668817","https://openalex.org/W2150383023","https://openalex.org/W2155265018","https://openalex.org/W2157532819","https://openalex.org/W4251601281","https://openalex.org/W6637231022"],"related_works":["https://openalex.org/W2611282738","https://openalex.org/W2975262101","https://openalex.org/W1573371033","https://openalex.org/W4232107035","https://openalex.org/W2041122820","https://openalex.org/W2080168170","https://openalex.org/W2169592713","https://openalex.org/W113108599","https://openalex.org/W2027252317","https://openalex.org/W2580878117"],"abstract_inverted_index":{"Measuring":[0],"document":[1,17,22,36,47,69,109,129,150,214,219,248],"similarity":[2,23,37,45,48,70,220,238,258],"is":[3,24,38,81,96,120,158,179],"important":[4],"in":[5,30,127,187,212,259],"order":[6],"to":[7,13,90,98,107,113,124],"find":[8],"documents":[9,229],"which":[10,157],"are":[11],"similar":[12,92],"a":[14,19,67,128,149,159,163,188,213,246],"given":[15],"query":[16],"from":[18],"user.":[20],"Text-based":[21],"measured":[25],"by":[26,49,151,239],"comparing":[27],"the":[28,39,43,51,74,84,105,132,153,168,172,175,183,192,194,198,204,207,210,223,236,241,252,256,262,265],"words":[29,135,186,211],"two":[31,115],"documents.":[32],"The":[33,79,118],"representative":[34],"text-based":[35],"cosine":[40,44,257],"similarity.":[41,60],"Since":[42],"computes":[46],"estimating":[50],"frequency":[52],"of":[53,83,148,162,171,177,185,261],"common":[54],"words,":[55],"it":[56],"cannot":[57],"reflect":[58],"word":[59],"To":[61,103],"solve":[62,114],"this":[63],"problem,":[64],"we":[65,111,233],"propose":[66],"new":[68,146],"measure":[71],"based":[72,202,221],"on":[73,203,222,245],"earth":[75],"mover's":[76],"distance":[77,87,133,199],"(EMD).":[78],"EMD":[80,106,119,173,224],"one":[82],"most":[85],"popular":[86],"functions":[88],"used":[89,126],"search":[91,101],"multimedia":[93],"contents":[94],"and":[95,209,264],"known":[97],"provide":[99],"good":[100],"results.":[102],"apply":[104],"compute":[108],"similarity,":[110],"have":[112],"problems:":[116],"(1)":[117],"too":[121],"time":[122],"consuming":[123],"be":[125,137],"database,":[130,215],"(2)":[131],"between":[134,200,206],"should":[136],"defined.":[138],"Our":[139,226],"proposed":[140,195,253],"approach":[141,196,227,254],"first":[142],"extracts":[143],"topics":[144,178,201,208],"as":[145],"features":[147],"applying":[152],"latent":[154],"Dirichlet":[155],"allocation,":[156],"generative":[160],"model":[161],"document.":[164,189],"It":[165],"can":[166,234],"decrease":[167],"computational":[169],"cost":[170],"because":[174],"number":[176,184],"much":[180],"smaller":[181],"than":[182],"After":[190],"extracting":[191],"topics,":[193],"calculates":[197],"relation":[205],"thereby":[216],"making":[217],"computing":[218],"possible.":[225],"searches":[228],"more":[230],"accurately":[231],"since":[232],"consider":[235],"semantic":[237],"using":[240],"EMD.":[242],"Experimental":[243],"results":[244],"real-world":[247],"database":[249],"indicate":[250],"that":[251],"outperforms":[255],"terms":[260],"accuracy":[263],"performance.":[266]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
