{"id":"https://openalex.org/W2024635814","doi":"https://doi.org/10.13053/cys-18-3-2043","title":"Soft Similarity and Soft Cosine Measure: Similarity of Features in Vector Space Model","display_name":"Soft Similarity and Soft Cosine Measure: Similarity of Features in Vector Space Model","publication_year":2014,"publication_date":"2014-09-30","ids":{"openalex":"https://openalex.org/W2024635814","doi":"https://doi.org/10.13053/cys-18-3-2043","mag":"2024635814"},"language":"es","primary_location":{"id":"doi:10.13053/cys-18-3-2043","is_oa":true,"landing_page_url":"https://doi.org/10.13053/cys-18-3-2043","pdf_url":"https://cys.cic.ipn.mx/ojs/index.php/CyS/article/view/2043/1921","source":{"id":"https://openalex.org/S61446325","display_name":"Computaci\u00f3n y Sistemas","issn_l":"1405-5546","issn":["1405-5546","2007-9737"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319599","host_organization_name":"National Polytechnic Institute","host_organization_lineage":["https://openalex.org/P4310319599"],"host_organization_lineage_names":["National Polytechnic Institute"],"type":"journal"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computaci\u00f3n y Sistemas","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://cys.cic.ipn.mx/ojs/index.php/CyS/article/view/2043/1921","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008287867","display_name":"Grigori Sidorov","orcid":"https://orcid.org/0000-0003-3901-3522"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":true,"raw_author_name":"Grigori Sidorov","raw_affiliation_strings":["Centro de Investigaci n en Computaci n, Instituto Polit cnico Nacional, M xico D.F., Mexico","Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC)"],"affiliations":[{"raw_affiliation_string":"Centro de Investigaci n en Computaci n, Instituto Polit cnico Nacional, M xico D.F., Mexico","institution_ids":["https://openalex.org/I59361560"]},{"raw_affiliation_string":"Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049701126","display_name":"Alexander Gelbukh","orcid":"https://orcid.org/0000-0001-7845-9039"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Alexander Gelbukh","raw_affiliation_strings":["Centro de Investigaci n en Computaci n, Instituto Polit cnico Nacional, M xico D.F., Mexico","Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC)"],"affiliations":[{"raw_affiliation_string":"Centro de Investigaci n en Computaci n, Instituto Polit cnico Nacional, M xico D.F., Mexico","institution_ids":["https://openalex.org/I59361560"]},{"raw_affiliation_string":"Centro de Investigaci\u00f3n en Computaci\u00f3n (CIC)","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5013558219","display_name":"Helena G\u00f3mez-Adorno","orcid":"https://orcid.org/0000-0002-6966-9912"},"institutions":[{"id":"https://openalex.org/I59361560","display_name":"Instituto Polit\u00e9cnico Nacional","ror":"https://ror.org/059sp8j34","country_code":"MX","type":"education","lineage":["https://openalex.org/I59361560"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"Helena G\u00f3mez-Adorno","raw_affiliation_strings":["[Instituto Polit\u00e9cnico Nacional]"],"affiliations":[{"raw_affiliation_string":"[Instituto Polit\u00e9cnico Nacional]","institution_ids":["https://openalex.org/I59361560"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5016612157","display_name":"David Pinto","orcid":"https://orcid.org/0000-0002-8516-5925"},"institutions":[{"id":"https://openalex.org/I721619","display_name":"Benem\u00e9rita Universidad Aut\u00f3noma de Puebla","ror":"https://ror.org/03p2z7827","country_code":"MX","type":"education","lineage":["https://openalex.org/I721619"]},{"id":"https://openalex.org/I4210111321","display_name":"Universidad de Puebla","ror":"https://ror.org/02rsx0d74","country_code":"MX","type":"education","lineage":["https://openalex.org/I4210111321"]}],"countries":["MX"],"is_corresponding":false,"raw_author_name":"David Pinto","raw_affiliation_strings":["Facultad de Ciencias de la Computaci n, Benem rita Universidad Aut noma de Puebla, Puebla, Mexico",", Benem\u00e9rita Universidad Aut\u00f3noma de Puebla"],"affiliations":[{"raw_affiliation_string":"Facultad de Ciencias de la Computaci n, Benem rita Universidad Aut noma de Puebla, Puebla, Mexico","institution_ids":["https://openalex.org/I4210111321","https://openalex.org/I721619"]},{"raw_affiliation_string":", Benem\u00e9rita Universidad Aut\u00f3noma de Puebla","institution_ids":["https://openalex.org/I721619"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5008287867"],"corresponding_institution_ids":["https://openalex.org/I59361560"],"apc_list":null,"apc_paid":null,"fwci":34.6052,"has_fulltext":true,"cited_by_count":373,"citation_normalized_percentile":{"value":0.99769433,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":"18","issue":"3","first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cosine-similarity","display_name":"Cosine similarity","score":0.8440169095993042},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.8013910055160522},{"id":"https://openalex.org/keywords/similarity-measure","display_name":"Similarity measure","score":0.7209104299545288},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.662337601184845},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.571062445640564},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.560408890247345},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.5356338620185852},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.45784348249435425},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4570157825946808},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.45685887336730957},{"id":"https://openalex.org/keywords/trigonometric-functions","display_name":"Trigonometric functions","score":0.4154159128665924},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3545075058937073},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.2040833830833435},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.05955156683921814}],"concepts":[{"id":"https://openalex.org/C2780762811","wikidata":"https://www.wikidata.org/wiki/Q1784941","display_name":"Cosine similarity","level":3,"score":0.8440169095993042},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.8013910055160522},{"id":"https://openalex.org/C2776517306","wikidata":"https://www.wikidata.org/wiki/Q29017317","display_name":"Similarity measure","level":2,"score":0.7209104299545288},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.662337601184845},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.571062445640564},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.560408890247345},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.5356338620185852},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.45784348249435425},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4570157825946808},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.45685887336730957},{"id":"https://openalex.org/C178009071","wikidata":"https://www.wikidata.org/wiki/Q93344","display_name":"Trigonometric functions","level":2,"score":0.4154159128665924},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3545075058937073},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2040833830833435},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.05955156683921814},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.13053/cys-18-3-2043","is_oa":true,"landing_page_url":"https://doi.org/10.13053/cys-18-3-2043","pdf_url":"https://cys.cic.ipn.mx/ojs/index.php/CyS/article/view/2043/1921","source":{"id":"https://openalex.org/S61446325","display_name":"Computaci\u00f3n y Sistemas","issn_l":"1405-5546","issn":["1405-5546","2007-9737"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319599","host_organization_name":"National Polytechnic Institute","host_organization_lineage":["https://openalex.org/P4310319599"],"host_organization_lineage_names":["National Polytechnic Institute"],"type":"journal"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computaci\u00f3n y Sistemas","raw_type":"journal-article"},{"id":"pmh:oai:ipn.redalyc.org:61532067007","is_oa":false,"landing_page_url":"http://www.redalyc.org/articulo.oa?id=61532067007","pdf_url":null,"source":{"id":"https://openalex.org/S4306402641","display_name":"LA Referencia (Red Federada de Repositorios Institucionales de Publicaciones Cient\u00edficas)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4383465926","host_organization_name":"LA Referencia","host_organization_lineage":["https://openalex.org/I4383465926"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"instname:Instituto Polit\u00e9cnico Nacional","raw_type":"info:eu-repo/semantics/article"},{"id":"pmh:oai:redalyc.org:61532067007","is_oa":false,"landing_page_url":"https://www.redalyc.org/articulo.oa?id=61532067007","pdf_url":null,"source":{"id":"https://openalex.org/S4377196100","display_name":"Redalyc (Universidad Aut\u00f3noma del Estado de M\u00e9xico)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I179647637","host_organization_name":"Universidad Aut\u00f3noma del Estado de M\u00e9xico","host_organization_lineage":["https://openalex.org/I179647637"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"instname:Instituto Polit\u00e9cnico Nacional","raw_type":"info:eu-repo/semantics/article"}],"best_oa_location":{"id":"doi:10.13053/cys-18-3-2043","is_oa":true,"landing_page_url":"https://doi.org/10.13053/cys-18-3-2043","pdf_url":"https://cys.cic.ipn.mx/ojs/index.php/CyS/article/view/2043/1921","source":{"id":"https://openalex.org/S61446325","display_name":"Computaci\u00f3n y Sistemas","issn_l":"1405-5546","issn":["1405-5546","2007-9737"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319599","host_organization_name":"National Polytechnic Institute","host_organization_lineage":["https://openalex.org/P4310319599"],"host_organization_lineage_names":["National Polytechnic Institute"],"type":"journal"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computaci\u00f3n y Sistemas","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6299999952316284,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2024635814.pdf","grobid_xml":"https://content.openalex.org/works/W2024635814.grobid-xml"},"referenced_works_count":30,"referenced_works":["https://openalex.org/W1514786130","https://openalex.org/W1566209275","https://openalex.org/W1570542661","https://openalex.org/W1647671624","https://openalex.org/W1846261984","https://openalex.org/W1930331324","https://openalex.org/W1967981232","https://openalex.org/W1969825376","https://openalex.org/W2024635814","https://openalex.org/W2032031606","https://openalex.org/W2061182717","https://openalex.org/W2070975020","https://openalex.org/W2081580037","https://openalex.org/W2096692384","https://openalex.org/W2165938099","https://openalex.org/W2169528473","https://openalex.org/W2189143063","https://openalex.org/W2397880443","https://openalex.org/W2401130810","https://openalex.org/W2402166941","https://openalex.org/W2494779131","https://openalex.org/W2612383341","https://openalex.org/W2915108564","https://openalex.org/W2973726998","https://openalex.org/W3198103189","https://openalex.org/W4386506836","https://openalex.org/W6630801128","https://openalex.org/W6634059332","https://openalex.org/W6685290738","https://openalex.org/W6837447454"],"related_works":["https://openalex.org/W2389818373","https://openalex.org/W126212742","https://openalex.org/W2220831889","https://openalex.org/W1948687848","https://openalex.org/W2953417386","https://openalex.org/W2319693127","https://openalex.org/W3013312691","https://openalex.org/W3027421045","https://openalex.org/W4312683641","https://openalex.org/W2056226831"],"abstract_inverted_index":{"We":[0,144],"show":[1,203],"how":[2],"to":[3,53,121,190,196],"consider":[4,165],"similarity":[5,11,31,39,62,111,117,132,189,237],"between":[6,32,40,67,112,238],"features":[7,41,68,113,178,185,231],"for":[8,20,71,97,148,166,184],"calculation":[9,152],"of":[10,12,27,153,162,173,175,247],"objects":[13],"in":[14,73,95,134,160,212,242,245],"the":[15,57,60,122,129,154,176,197,236],"Vector":[16],"Space":[17],"Model":[18],"(VSM)":[19],"machine":[21],"learning":[22],"algorithms":[23],"and":[24,49,101,232],"other":[25],"classes":[26],"methods":[28],"that":[29,38,186,204],"involve":[30],"objects.":[33],"Unlike":[34],"LSA,":[35],"we":[36,127,139,164,226],"assume":[37],"is":[42,69,109,119],"known":[43],"(say,":[44],"from":[45,56],"a":[46,168],"synonym":[47],"dictionary)":[48],"does":[50],"not":[51],"need":[52],"be":[54,83],"learned":[55],"data.We":[58],"call":[59,140],"proposed":[61],"measure":[63,118,133,208],"soft":[64,116,155,206],"similarity.":[65,124,182],"Similarity":[66],"common,":[70],"example,":[72,98,159],"natural":[74],"language":[75],"processing:":[76],"words,":[77],"n-grams,":[78,239],"or":[79,150,244],"syntactic":[80,228],"n-grams":[81,229],"can":[82],"somewhat":[84],"different":[85,89,104],"(which":[86],"makes":[87],"them":[88,163],"features)":[90],"but":[91,105],"still":[92],"have":[93],"much":[94],"common:":[96],"words":[99],"\u201cplay\u201d":[100],"\u201cgame\u201d":[102],"are":[103],"related.":[106],"When":[107],"there":[108],"no":[110,188],"then":[114],"our":[115,193,205,213],"equal":[120],"standard":[123,198],"For":[125,158],"this,":[126],"generalize":[128],"well-known":[130],"cosine":[131,142,156,199,207],"VSM":[135,167],"by":[136,180],"introducing":[137],"what":[138],"\u201csoft":[141],"measure\u201d.":[143],"propose":[145],"various":[146],"formulas":[147,194],"exact":[149],"approximate":[151],"measure.":[157,200],"one":[161],"new":[169],"feature":[170],"space":[171],"consisting":[172],"pairs":[174],"original":[177],"weighted":[179],"their":[181],"Again,":[183],"bear":[187],"each":[191],"other,":[192],"reduce":[195],"Our":[201],"experiments":[202],"provides":[209],"better":[210],"performance":[211],"case":[214],"study:":[215],"entrance":[216],"exams":[217],"question":[218],"answering":[219],"task":[220],"at":[221],"CLEF.":[222],"In":[223],"these":[224],"experiments,":[225],"use":[227],"as":[230,235],"Levenshtein":[233],"distance":[234],"measured":[240],"either":[241],"characters":[243],"elements":[246],"n-grams.":[248]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":33},{"year":2023,"cited_by_count":37},{"year":2022,"cited_by_count":47},{"year":2021,"cited_by_count":39},{"year":2020,"cited_by_count":41},{"year":2019,"cited_by_count":34},{"year":2018,"cited_by_count":40},{"year":2017,"cited_by_count":34},{"year":2016,"cited_by_count":20},{"year":2015,"cited_by_count":22},{"year":2014,"cited_by_count":6}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
