{"id":"https://openalex.org/W3088474127","doi":"https://doi.org/10.1145/3442381.3449805","title":"Crosslingual Topic Modeling with WikiPDA","display_name":"Crosslingual Topic Modeling with WikiPDA","publication_year":2021,"publication_date":"2021-04-19","ids":{"openalex":"https://openalex.org/W3088474127","doi":"https://doi.org/10.1145/3442381.3449805","mag":"3088474127"},"language":"en","primary_location":{"id":"doi:10.1145/3442381.3449805","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3442381.3449805","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Web Conference 2021","raw_type":"proceedings-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3442381.3449805","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Tiziano Piccardi","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Tiziano Piccardi","raw_affiliation_strings":["Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]},{"author_position":"last","author":{"id":null,"display_name":"Robert West","orcid":null},"institutions":[{"id":"https://openalex.org/I5124864","display_name":"\u00c9cole Polytechnique F\u00e9d\u00e9rale de Lausanne","ror":"https://ror.org/02s376052","country_code":"CH","type":"education","lineage":["https://openalex.org/I2799323385","https://openalex.org/I5124864"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Robert West","raw_affiliation_strings":["Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Switzerland"],"affiliations":[{"raw_affiliation_string":"Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne, Switzerland","institution_ids":["https://openalex.org/I5124864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I5124864"],"apc_list":null,"apc_paid":null,"fwci":0.5599,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.7154906,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"3032","last_page":"3041"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9902999997138977,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.989799976348877,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.9185000061988831},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.8478000164031982},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5486999750137329},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.38449999690055847},{"id":"https://openalex.org/keywords/dirichlet-distribution","display_name":"Dirichlet distribution","score":0.376800000667572}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.9185000061988831},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.8478000164031982},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.816100001335144},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5807999968528748},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5677000284194946},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5486999750137329},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.42480000853538513},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.38449999690055847},{"id":"https://openalex.org/C169214877","wikidata":"https://www.wikidata.org/wiki/Q981016","display_name":"Dirichlet distribution","level":3,"score":0.376800000667572},{"id":"https://openalex.org/C141318989","wikidata":"https://www.wikidata.org/wiki/Q5753066","display_name":"Hierarchical Dirichlet process","level":4,"score":0.3393000066280365},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.27239999175071716},{"id":"https://openalex.org/C2780239667","wikidata":"https://www.wikidata.org/wiki/Q2102850","display_name":"Polyglot","level":2,"score":0.27160000801086426},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2554999887943268}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3442381.3449805","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3442381.3449805","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Web Conference 2021","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:2009.11207","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2009.11207","pdf_url":"https://arxiv.org/pdf/2009.11207","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:oai:infoscience.epfl.ch:292922","is_oa":true,"landing_page_url":"https://infoscience.epfl.ch/handle/20.500.14299/186580","pdf_url":null,"source":{"id":"https://openalex.org/S4306400487","display_name":"Infoscience (Ecole Polytechnique F\u00e9d\u00e9rale de Lausanne)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"WoS","raw_type":"conference proceedings"}],"best_oa_location":{"id":"doi:10.1145/3442381.3449805","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3442381.3449805","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Web Conference 2021","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":26,"referenced_works":["https://openalex.org/W1549887604","https://openalex.org/W1556255569","https://openalex.org/W1880262756","https://openalex.org/W1964189668","https://openalex.org/W1969096664","https://openalex.org/W1970396504","https://openalex.org/W2033593667","https://openalex.org/W2054141820","https://openalex.org/W2058236194","https://openalex.org/W2094064503","https://openalex.org/W2096276656","https://openalex.org/W2100341149","https://openalex.org/W2100456378","https://openalex.org/W2106344701","https://openalex.org/W2106836779","https://openalex.org/W2119188105","https://openalex.org/W2127365348","https://openalex.org/W2140746849","https://openalex.org/W2174706414","https://openalex.org/W2590878907","https://openalex.org/W2888236192","https://openalex.org/W2898966467","https://openalex.org/W2903277833","https://openalex.org/W2907069963","https://openalex.org/W2997805114","https://openalex.org/W3084241561"],"related_works":[],"abstract_inverted_index":{"We":[0,108],"present":[1],"Wikipedia-based":[2],"Polyglot":[3],"Dirichlet":[4,99],"Allocation":[5],"(WikiPDA),":[6],"a":[7,24,80,115,140,156],"crosslingual":[8,126],"topic":[9,83],"model":[10,141],"that":[11,34,89],"learns":[12],"to":[13,38,44],"represent":[14],"Wikipedia":[15,35,122],"articles":[16,36,59],"written":[17],"in":[18,46,65,112,120,168],"any":[19,148],"language":[20,123,137,166],"as":[21,55,155],"distributions":[22],"over":[23],"common":[25],"set":[26],"of":[27,57,72,117],"language-independent":[28],"topics.":[29],"It":[30],"leverages":[31],"the":[32,47],"fact":[33],"link":[37],"each":[39],"other":[40],"and":[41,77,125],"are":[42,60],"mapped":[43],"concepts":[45],"Wikidata":[48],"knowledge":[49],"base,":[50],"such":[51],"that,":[52],"when":[53],"represented":[54],"bags":[56,71],"links,":[58],"inherently":[61],"language-independent.":[62],"WikiPDA":[63,90,154],"works":[64],"two":[66,113],"steps,":[67],"by":[68],"first":[69],"densifying":[70],"links":[73],"using":[74],"matrix":[75],"completion":[76],"then":[78],"training":[79],"standard":[81],"monolingual":[82,96],"model.":[84],"A":[85],"human":[86],"evaluation":[87],"shows":[88],"produces":[91],"more":[92],"coherent":[93],"topics":[94],"than":[95],"text-based":[97],"latent":[98],"allocation":[100],"(LDA),":[101],"thus":[102],"offering":[103],"crosslinguality":[104],"at":[105,177],"no":[106],"cost.":[107],"demonstrate":[109],"WikiPDA\u2019s":[110,133],"utility":[111],"applications:":[114],"study":[116],"topical":[118],"biases":[119],"28":[121],"editions,":[124],"supervised":[127],"document":[128],"classification.":[129],"Finally,":[130],"we":[131],"highlight":[132],"capacity":[134],"for":[135,144,159],"zero-shot":[136],"transfer,":[138],"where":[139],"is":[142],"reused":[143],"new":[145],"languages":[146],"without":[147],"fine-tuning.":[149],"Researchers":[150],"can":[151],"benefit":[152],"from":[153],"practical":[157],"tool":[158],"studying":[160],"Wikipedia\u2019s":[161],"content":[162],"across":[163],"its":[164],"299":[165],"editions":[167],"interpretable":[169],"ways,":[170],"via":[171],"an":[172],"easy-to-use":[173],"library":[174],"publicly":[175],"available":[176],"https://github.com/epfl-dlab/WikiPDA.":[178]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2020-10-01T00:00:00"}
