{"id":"https://openalex.org/W2986656862","doi":"https://doi.org/10.1145/3360901.3364444","title":"Scalable Cross-lingual Document Similarity through Language-specific Concept Hierarchies","display_name":"Scalable Cross-lingual Document Similarity through Language-specific Concept Hierarchies","publication_year":2019,"publication_date":"2019-09-23","ids":{"openalex":"https://openalex.org/W2986656862","doi":"https://doi.org/10.1145/3360901.3364444","mag":"2986656862"},"language":"en","primary_location":{"id":"doi:10.1145/3360901.3364444","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3360901.3364444","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th International Conference on Knowledge Capture","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2101.03026","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Carlos Badenes-Olmedo","orcid":null},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":true,"raw_author_name":"Carlos Badenes-Olmedo","raw_affiliation_strings":["Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain","institution_ids":["https://openalex.org/I88060688"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Jos\u00e9 Luis Redondo-Garc\u00eda","orcid":null},"institutions":[{"id":"https://openalex.org/I4210123934","display_name":"Amazon (United Kingdom)","ror":"https://ror.org/02xey9634","country_code":"GB","type":"company","lineage":["https://openalex.org/I1311688040","https://openalex.org/I4210123934"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Jos\u00e9 Luis Redondo-Garc\u00eda","raw_affiliation_strings":["Amazon Research, Cambridge, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Amazon Research, Cambridge, United Kingdom","institution_ids":["https://openalex.org/I4210123934"]}]},{"author_position":"last","author":{"id":null,"display_name":"Oscar Corcho","orcid":null},"institutions":[{"id":"https://openalex.org/I88060688","display_name":"Universidad Polit\u00e9cnica de Madrid","ror":"https://ror.org/03n6nwv02","country_code":"ES","type":"education","lineage":["https://openalex.org/I88060688"]}],"countries":["ES"],"is_corresponding":false,"raw_author_name":"Oscar Corcho","raw_affiliation_strings":["Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain"],"affiliations":[{"raw_affiliation_string":"Universidad Polit\u00e9cnica de Madrid, Boadilla del Monte, Spain","institution_ids":["https://openalex.org/I88060688"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I88060688"],"apc_list":null,"apc_paid":null,"fwci":0.578,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.76600995,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"147","last_page":"153"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9959999918937683,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9878000020980835,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9872999787330627,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sorting","display_name":"Sorting","score":0.5019999742507935},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.4959999918937683},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.46639999747276306},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.453000009059906},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.43799999356269836},{"id":"https://openalex.org/keywords/probabilistic-logic","display_name":"Probabilistic logic","score":0.42500001192092896},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.39340001344680786},{"id":"https://openalex.org/keywords/hierarchy","display_name":"Hierarchy","score":0.3799999952316284},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.34610000252723694}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8389999866485596},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5353000164031982},{"id":"https://openalex.org/C111696304","wikidata":"https://www.wikidata.org/wiki/Q2303697","display_name":"Sorting","level":2,"score":0.5019999742507935},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5012000203132629},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.4959999918937683},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.46639999747276306},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4650000035762787},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.453000009059906},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.43799999356269836},{"id":"https://openalex.org/C49937458","wikidata":"https://www.wikidata.org/wiki/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.42500001192092896},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.39340001344680786},{"id":"https://openalex.org/C31170391","wikidata":"https://www.wikidata.org/wiki/Q188619","display_name":"Hierarchy","level":2,"score":0.3799999952316284},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.34610000252723694},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.33959999680519104},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.328000009059906},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.31839999556541443},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.30390000343322754},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.2851000130176544},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2833000123500824},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2791000008583069},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2773999869823456},{"id":"https://openalex.org/C161156560","wikidata":"https://www.wikidata.org/wiki/Q1638872","display_name":"Document retrieval","level":2,"score":0.26499998569488525},{"id":"https://openalex.org/C513874922","wikidata":"https://www.wikidata.org/wiki/Q212805","display_name":"Digital library","level":3,"score":0.2621000111103058},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.2574000060558319},{"id":"https://openalex.org/C88548561","wikidata":"https://www.wikidata.org/wiki/Q347599","display_name":"sort","level":2,"score":0.25440001487731934}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3360901.3364444","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3360901.3364444","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 10th International Conference on Knowledge Capture","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2101.03026","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2101.03026","pdf_url":"https://arxiv.org/pdf/2101.03026","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2101.03026","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2101.03026","pdf_url":"https://arxiv.org/pdf/2101.03026","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G2378929099","display_name":null,"funder_award_id":"780247","funder_id":"https://openalex.org/F4320338335","funder_display_name":"H2020 European Research Council"},{"id":"https://openalex.org/G3176009801","display_name":null,"funder_award_id":"TIN2016-78011-C4-4-R","funder_id":"https://openalex.org/F4320335322","funder_display_name":"European Regional Development Fund"}],"funders":[{"id":"https://openalex.org/F4320335322","display_name":"European Regional Development Fund","ror":"https://ror.org/00k4n6c32"},{"id":"https://openalex.org/F4320338335","display_name":"H2020 European Research Council","ror":"https://ror.org/0472cxd90"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https://openalex.org/W1593239840","https://openalex.org/W1877481539","https://openalex.org/W1979044015","https://openalex.org/W2001932471","https://openalex.org/W2028742638","https://openalex.org/W2081534675","https://openalex.org/W2098126593","https://openalex.org/W2108399535","https://openalex.org/W2128925311","https://openalex.org/W2129066856","https://openalex.org/W2146950091","https://openalex.org/W2147152072","https://openalex.org/W2159835345","https://openalex.org/W2174706414","https://openalex.org/W2751976275","https://openalex.org/W3013669377","https://openalex.org/W4300009529","https://openalex.org/W6931635798"],"related_works":[],"abstract_inverted_index":{"With":[0],"the":[1,16,76,107,162],"ongoing":[2],"growth":[3],"in":[4,9,58,141],"number":[5],"of":[6,13,19,41,56,78,101,130,153,168],"digital":[7],"articles":[8],"a":[10,39,70,98,142],"wider":[11],"set":[12],"languages":[14],"and":[15,88,148,165,176],"expanding":[17],"use":[18],"different":[20],"languages,":[21],"we":[22],"need":[23],"annotation":[24],"methods":[25],"that":[26,46,80,118],"enable":[27],"browsing":[28],"multi-lingual":[29,102,154],"corpora.":[30],"Multilingual":[31],"probabilistic":[32],"topic":[33],"models":[34,45],"have":[35],"recently":[36],"emerged":[37],"as":[38],"group":[40],"semi-supervised":[42],"machine":[43],"learning":[44],"can":[47,83],"be":[48],"used":[49],"to":[50,68,86,92,95],"perform":[51],"thematic":[52],"explorations":[53],"on":[54,161,174],"collections":[55],"texts":[57],"multiple":[59],"languages.":[60],"However,":[61],"these":[62],"approaches":[63],"require":[64,121],"theme-aligned":[65],"training":[66,108],"data":[67],"create":[69],"language-independent":[71],"space.":[72],"This":[73,110],"constraint":[74],"limits":[75],"amount":[77],"scenarios":[79],"this":[81],"technique":[82],"offer":[84],"solutions":[85],"train":[87],"makes":[89],"it":[90],"difficult":[91],"scale":[93],"up":[94],"situations":[96],"where":[97],"huge":[99],"collection":[100],"documents":[103,140,150,178],"are":[104],"required":[105],"during":[106],"phase.":[109],"paper":[111],"presents":[112],"an":[113],"unsupervised":[114],"document":[115],"similarity":[116],"algorithm":[117,134],"does":[119],"not":[120],"parallel":[122],"or":[123,126],"comparable":[124],"corpora,":[125],"any":[127],"other":[128],"type":[129],"translation":[131],"resource.":[132],"The":[133],"annotates":[135],"topics":[136],"automatically":[137],"created":[138],"from":[139,156],"single":[143],"language":[144],"with":[145],"cross-lingual":[146],"labels":[147],"describes":[149],"by":[151,179],"hierarchies":[152],"concepts":[155],"independently-trained":[157],"models.":[158],"Experiments":[159],"performed":[160],"English,":[163],"Spanish":[164],"French":[166],"editions":[167],"JCR-Acquis":[169],"corpora":[170],"reveal":[171],"promising":[172],"results":[173],"classifying":[175],"sorting":[177],"similar":[180],"content.":[181]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2019-11-22T00:00:00"}
