{"id":"https://openalex.org/W2242818087","doi":"https://doi.org/10.1109/icdm.2015.131","title":"KnowSim: A Document Similarity Measure on Structured Heterogeneous Information Networks","display_name":"KnowSim: A Document Similarity Measure on Structured Heterogeneous Information Networks","publication_year":2015,"publication_date":"2015-11-01","ids":{"openalex":"https://openalex.org/W2242818087","doi":"https://doi.org/10.1109/icdm.2015.131","mag":"2242818087","pmid":"https://pubmed.ncbi.nlm.nih.gov/27034626"},"language":"en","primary_location":{"id":"doi:10.1109/icdm.2015.131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2015.131","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Data Mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100329697","display_name":"Chenguang Wang","orcid":"https://orcid.org/0000-0003-3464-4923"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]}],"countries":["CN","US"],"is_corresponding":true,"raw_author_name":"Chenguang Wang","raw_affiliation_strings":["School of EECS, Peking University"],"affiliations":[{"raw_affiliation_string":"School of EECS, Peking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5020880385","display_name":"Yangqiu Song","orcid":"https://orcid.org/0000-0002-7818-6090"},"institutions":[{"id":"https://openalex.org/I16820183","display_name":"Illinois College","ror":"https://ror.org/02ys5x139","country_code":"US","type":"education","lineage":["https://openalex.org/I16820183"]},{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yangqiu Song","raw_affiliation_strings":["Department of Computer Science, University of Illinois at Urbana-Champaign","University of Illinois College of Law, Champaign, IL, US"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]},{"raw_affiliation_string":"University of Illinois College of Law, Champaign, IL, US","institution_ids":["https://openalex.org/I157725225","https://openalex.org/I16820183"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100327734","display_name":"Haoran Li","orcid":"https://orcid.org/0000-0002-8286-3672"},"institutions":[{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Haoran Li","raw_affiliation_strings":["School of EECS, Peking University","Department of Computer Science, University of Illinois at Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"School of EECS, Peking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]},{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100642537","display_name":"Ming Zhang","orcid":"https://orcid.org/0000-0002-9809-3430"},"institutions":[{"id":"https://openalex.org/I111483173","display_name":"King University","ror":"https://ror.org/01evb6z23","country_code":"US","type":"education","lineage":["https://openalex.org/I111483173"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Ming Zhang","raw_affiliation_strings":["School of EECS, Peking University"],"affiliations":[{"raw_affiliation_string":"School of EECS, Peking University","institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5019539533","display_name":"Jiawei Han","orcid":"https://orcid.org/0000-0002-3629-2696"},"institutions":[{"id":"https://openalex.org/I157725225","display_name":"University of Illinois Urbana-Champaign","ror":"https://ror.org/047426m28","country_code":"US","type":"education","lineage":["https://openalex.org/I157725225"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jiawei Han","raw_affiliation_strings":["Department of Computer Science, University of Illinois at Urbana-Champaign"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of Illinois at Urbana-Champaign","institution_ids":["https://openalex.org/I157725225"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100329697"],"corresponding_institution_ids":["https://openalex.org/I111483173","https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":12.9432,"has_fulltext":false,"cited_by_count":57,"citation_normalized_percentile":{"value":0.98693732,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":"2015","issue":null,"first_page":"1015","last_page":"1020"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11719","display_name":"Data Quality and Management","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7052910923957825},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.6867505311965942},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.5628892183303833},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5579062104225159},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5559857487678528},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2755492925643921},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.26524341106414795}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7052910923957825},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.6867505311965942},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.5628892183303833},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5579062104225159},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5559857487678528},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2755492925643921},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.26524341106414795},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1109/icdm.2015.131","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icdm.2015.131","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE International Conference on Data Mining","raw_type":"proceedings-article"},{"id":"pmid:27034626","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/27034626","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings. IEEE International Conference on Data Mining","raw_type":null},{"id":"pmh:oai:europepmc.org:3778006","is_oa":false,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/4811603","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:repository.hkust.edu.hk:1783.1-79851","is_oa":false,"landing_page_url":"http://repository.hkust.edu.hk/ir/Record/1783.1-79851","pdf_url":null,"source":{"id":"https://openalex.org/S4306401796","display_name":"Rare & Special e-Zone (The Hong Kong University of Science and Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I200769079","host_organization_name":"Hong Kong University of Science and Technology","host_organization_lineage":["https://openalex.org/I200769079"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Conference paper"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":54,"referenced_works":["https://openalex.org/W11313625","https://openalex.org/W89857650","https://openalex.org/W102708294","https://openalex.org/W103340358","https://openalex.org/W103965747","https://openalex.org/W1491611863","https://openalex.org/W1493490255","https://openalex.org/W1493526108","https://openalex.org/W1500752107","https://openalex.org/W1646278814","https://openalex.org/W1756422141","https://openalex.org/W1880262756","https://openalex.org/W1970544520","https://openalex.org/W1975563293","https://openalex.org/W2016753842","https://openalex.org/W2019421101","https://openalex.org/W2022166150","https://openalex.org/W2029249040","https://openalex.org/W2047604680","https://openalex.org/W2055400882","https://openalex.org/W2086254934","https://openalex.org/W2092045293","https://openalex.org/W2094728533","https://openalex.org/W2098711168","https://openalex.org/W2106865714","https://openalex.org/W2113729143","https://openalex.org/W2115461474","https://openalex.org/W2120779048","https://openalex.org/W2127978399","https://openalex.org/W2133517430","https://openalex.org/W2136930489","https://openalex.org/W2149620660","https://openalex.org/W2150102617","https://openalex.org/W2152322845","https://openalex.org/W2157361576","https://openalex.org/W2204209666","https://openalex.org/W2285986798","https://openalex.org/W2294556942","https://openalex.org/W3126976873","https://openalex.org/W4285719527","https://openalex.org/W6600468989","https://openalex.org/W6603592053","https://openalex.org/W6604186605","https://openalex.org/W6610080458","https://openalex.org/W6629486361","https://openalex.org/W6629638141","https://openalex.org/W6636771404","https://openalex.org/W6637805884","https://openalex.org/W6639619044","https://openalex.org/W6676756425","https://openalex.org/W6681822384","https://openalex.org/W6681875376","https://openalex.org/W6687938222","https://openalex.org/W6696176459"],"related_works":["https://openalex.org/W2319693127","https://openalex.org/W308539617","https://openalex.org/W1983228818","https://openalex.org/W2163064108","https://openalex.org/W2072263576","https://openalex.org/W2474567666","https://openalex.org/W4399930146","https://openalex.org/W2767257176","https://openalex.org/W2915154372","https://openalex.org/W1940044583"],"abstract_inverted_index":{"As":[0],"a":[1,62,66,69,105,116,171],"fundamental":[2],"task,":[3],"document":[4,24,47,67,101,167,198],"similarity":[5,102,160],"measure":[6],"has":[7],"broad":[8],"impact":[9],"to":[10,64,104,122,129,138,147,165,176],"document-based":[11],"classification,":[12],"clustering":[13],"and":[14,22,31,53,78,92,180,189],"ranking.":[15],"Traditional":[16],"approaches":[17],"represent":[18,65],"documents":[19,41,85],"as":[20,68],"bag-of-words":[21],"compute":[23,130,166],"similarities":[25],"using":[26],"measures":[27],"like":[28],"cosine,":[29],"Jaccard,":[30],"dice.":[32],"However,":[33],"entity":[34],"phrases":[35],"rather":[36],"than":[37],"single":[38],"words":[39,91],"in":[40,94,127],"can":[42,86],"be":[43,87,112],"critical":[44],"for":[45,183],"evaluating":[46],"relatedness.":[48],"Moreover,":[49],"types":[50],"of":[51,118,135],"entities":[52,77,93],"links":[54],"between":[55,115,132],"entities/words":[56],"are":[57,80],"also":[58],"informative.":[59],"We":[60,120],"propose":[61,121],"method":[63,144],"typed":[70],"heterogeneous":[71],"information":[72],"network":[73],"(HIN),":[74],"where":[75],"the":[76,90,95,100,124,149,152],"relations":[79],"annotated":[81],"with":[82,155],"types.":[83],"Multiple":[84],"linked":[88],"by":[89],"HIN.":[96],"Consequently,":[97],"we":[98],"convert":[99],"problem":[103],"graph":[106],"distance":[107,131],"problem.":[108],"Intuitively,":[109],"there":[110],"could":[111],"multiple":[113],"paths":[114],"pair":[117],"documents.":[119,133],"use":[123],"meta-path":[125],"defined":[126],"HIN":[128,182],"Instead":[134],"burdening":[136],"user":[137],"define":[139],"meaningful":[140],"meta-paths,":[141],"an":[142,158],"automatic":[143],"is":[145,163],"proposed":[146,164],"rank":[148],"meta-paths.":[150],"Given":[151],"meta-paths":[153],"associated":[154],"ranking":[156],"scores,":[157],"HIN-based":[159],"measure,":[161],"KnowSim,":[162],"similarities.":[168],"Using":[169],"Freebase,":[170],"well-known":[172],"world":[173],"knowledge":[174],"base,":[175],"conduct":[177],"semantic":[178],"parsing":[179],"construct":[181],"documents,":[184],"our":[185],"experiments":[186],"on":[187],"20Newsgroups":[188],"RCV1":[190],"datasets":[191],"show":[192],"that":[193],"KnowSim":[194],"generates":[195],"impressive":[196],"high-quality":[197],"clustering.":[199]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":4},{"year":2019,"cited_by_count":13},{"year":2018,"cited_by_count":9},{"year":2017,"cited_by_count":13},{"year":2016,"cited_by_count":8}],"updated_date":"2026-01-13T01:12:25.745995","created_date":"2025-10-10T00:00:00"}
