{"id":"https://openalex.org/W2071399839","doi":"https://doi.org/10.1145/1935826.1935887","title":"Cross lingual text classification by mining multilingual topics from wikipedia","display_name":"Cross lingual text classification by mining multilingual topics from wikipedia","publication_year":2011,"publication_date":"2011-02-01","ids":{"openalex":"https://openalex.org/W2071399839","doi":"https://doi.org/10.1145/1935826.1935887","mag":"2071399839"},"language":"en","primary_location":{"id":"doi:10.1145/1935826.1935887","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1935826.1935887","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the fourth ACM international conference on Web search and data mining","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072992613","display_name":"Xiaochuan Ni","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaochuan Ni","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102139311","display_name":"Jian-Tao Sun","orcid":null},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian-Tao Sun","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5088890329","display_name":"Jian Hu","orcid":"https://orcid.org/0000-0003-0946-9617"},"institutions":[{"id":"https://openalex.org/I2250653659","display_name":"Tencent (China)","ror":"https://ror.org/00hhjss72","country_code":"CN","type":"company","lineage":["https://openalex.org/I2250653659"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jian Hu","raw_affiliation_strings":["Tencent Soso, Beijing, China","Tencent, SOSO, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tencent Soso, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]},{"raw_affiliation_string":"Tencent, SOSO, Beijing, China","institution_ids":["https://openalex.org/I2250653659"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5108654890","display_name":"Zheng Chen","orcid":"https://orcid.org/0009-0000-9741-9956"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zheng Chen","raw_affiliation_strings":["Microsoft Research Asia, Beijing, China","Microsoft research Asia, Beijing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Microsoft Research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft research Asia, Beijing, China","institution_ids":["https://openalex.org/I4210113369"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":12.7497,"has_fulltext":false,"cited_by_count":60,"citation_normalized_percentile":{"value":0.98714327,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"375","last_page":"384"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12478","display_name":"Wikis in Education and Collaboration","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8640339374542236},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6789153218269348},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6005659103393555},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.5885499119758606},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5868842601776123},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5061721205711365},{"id":"https://openalex.org/keywords/knowledge-base","display_name":"Knowledge base","score":0.4593953788280487}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8640339374542236},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6789153218269348},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6005659103393555},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.5885499119758606},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5868842601776123},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5061721205711365},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.4593953788280487}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1935826.1935887","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1935826.1935887","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the fourth ACM international conference on Web search and data mining","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8399999737739563}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W8895266","https://openalex.org/W58646613","https://openalex.org/W86887328","https://openalex.org/W89857650","https://openalex.org/W1880262756","https://openalex.org/W1994966918","https://openalex.org/W2004360803","https://openalex.org/W2061118075","https://openalex.org/W2064885744","https://openalex.org/W2084988910","https://openalex.org/W2093976457","https://openalex.org/W2096110600","https://openalex.org/W2099253769","https://openalex.org/W2110591510","https://openalex.org/W2120779048","https://openalex.org/W2121759644","https://openalex.org/W2129294185","https://openalex.org/W2135065658","https://openalex.org/W2139812240","https://openalex.org/W2140903445","https://openalex.org/W2150749667","https://openalex.org/W2151521349","https://openalex.org/W2157361576","https://openalex.org/W2171836785","https://openalex.org/W6603592053","https://openalex.org/W6676781583"],"related_works":["https://openalex.org/W3011059803","https://openalex.org/W3151736118","https://openalex.org/W4362495644","https://openalex.org/W2775554247","https://openalex.org/W2110168585","https://openalex.org/W3107474891","https://openalex.org/W2250213760","https://openalex.org/W4386247111","https://openalex.org/W4327642362","https://openalex.org/W2587014613"],"abstract_inverted_index":{"This":[0,148],"paper":[1],"investigates":[2],"how":[3],"to":[4,62,110,130],"effectively":[5],"do":[6,111],"cross":[7,112,183],"lingual":[8,113,184],"text":[9,114,128,185],"classification":[10],"by":[11,31,137],"leveraging":[12],"a":[13,99,102,127],"large":[14],"scale":[15],"and":[16],"multilingual":[17,44],"knowledge":[18,48],"base,":[19],"Wikipedia.":[20],"Based":[21],"on":[22],"the":[23,117,132,145,167],"observation":[24],"that":[25,174],"each":[26,59,79],"Wikipedia":[27,74],"concept":[28],"is":[29,179],"described":[30],"documents":[32,75,91,133,140],"of":[33,56,86,92,104,134,141],"different":[34,87,93],"languages,":[35],"we":[36,68,124],"adapt":[37],"existing":[38],"topic":[39,80,176],"modeling":[40,177],"algorithms":[41],"for":[42,121,166,181],"mining":[43],"topics":[45,52,71],"from":[46,73],"this":[47,66],"base.":[49],"The":[50,170],"extracted":[51,72],"have":[53],"multiple":[54],"types":[55],"representations,":[57],"with":[58,82],"type":[60],"corresponding":[61],"one":[63,122],"language.":[64,169],"In":[65],"work,":[67],"regard":[69],"such":[70],"as":[76],"universal-topics,":[77],"since":[78],"corresponds":[81],"same":[83],"semantic":[84],"information":[85],"languages.":[88],"Thus":[89],"new":[90],"languages":[94,143],"can":[95,125],"be":[96],"represented":[97],"in":[98],"space":[100],"using":[101],"group":[103],"universal-topics.":[105],"We":[106],"use":[107],"these":[108],"universal-topics":[109],"classification.":[115],"Given":[116],"training":[118],"data":[119,165],"labeled":[120],"language,":[123],"train":[126],"classifier":[129],"classify":[131],"another":[135],"language":[136],"mapping":[138],"all":[139],"both":[142],"into":[144],"universal-topic":[146],"space.":[147],"approach":[149,178],"does":[150],"not":[151],"require":[152],"any":[153],"additional":[154],"linguistic":[155],"resources,":[156],"like":[157],"bilingual":[158],"dictionaries,":[159],"machine":[160],"translation":[161],"tools,":[162],"or":[163],"labeling":[164],"target":[168],"evaluation":[171],"results":[172],"indicate":[173],"our":[175],"effective":[180],"building":[182],"classifier.":[186]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":2},{"year":2016,"cited_by_count":7},{"year":2015,"cited_by_count":6},{"year":2014,"cited_by_count":8},{"year":2013,"cited_by_count":13},{"year":2012,"cited_by_count":7}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
