{"id":"https://openalex.org/W2562500328","doi":"https://doi.org/10.1002/pra2.2016.14505301065","title":"Document representation methods for clustering bilingual documents","display_name":"Document representation methods for clustering bilingual documents","publication_year":2016,"publication_date":"2016-01-01","ids":{"openalex":"https://openalex.org/W2562500328","doi":"https://doi.org/10.1002/pra2.2016.14505301065","mag":"2562500328"},"language":"en","primary_location":{"id":"doi:10.1002/pra2.2016.14505301065","is_oa":false,"landing_page_url":"https://doi.org/10.1002/pra2.2016.14505301065","pdf_url":null,"source":{"id":"https://openalex.org/S4393918545","display_name":"Proceedings of the Association for Information Science and Technology","issn_l":"2373-9231","issn":["2373-9231"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Association for Information Science and Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5053659741","display_name":"Shutian Ma","orcid":"https://orcid.org/0000-0002-8339-6412"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shutian Ma","raw_affiliation_strings":["Department of Information Management Nanjing University of Science and Technology No. 200 Xiaolingwei Street Nanjing China","Department of Information Management, Nanjing University of Science and Technology, No. 200 Xiaolingwei Street, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information Management Nanjing University of Science and Technology No. 200 Xiaolingwei Street Nanjing China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]},{"raw_affiliation_string":"Department of Information Management, Nanjing University of Science and Technology, No. 200 Xiaolingwei Street, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056318061","display_name":"Chengzhi Zhang","orcid":"https://orcid.org/0000-0001-9522-2914"},"institutions":[{"id":"https://openalex.org/I200845125","display_name":"Nanjing University of Information Science and Technology","ror":"https://ror.org/02y0rxk19","country_code":"CN","type":"education","lineage":["https://openalex.org/I200845125"]},{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chengzhi Zhang","raw_affiliation_strings":["Department of Information Management Nanjing University of Science and Technology No. 200 Xiaolingwei Street Nanjing China","Department of Information Management, Nanjing University of Science and Technology, No. 200 Xiaolingwei Street, Nanjing, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Information Management Nanjing University of Science and Technology No. 200 Xiaolingwei Street Nanjing China","institution_ids":["https://openalex.org/I200845125","https://openalex.org/I36399199"]},{"raw_affiliation_string":"Department of Information Management, Nanjing University of Science and Technology, No. 200 Xiaolingwei Street, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5026188630","display_name":"Daqing He","orcid":"https://orcid.org/0000-0002-4645-8696"},"institutions":[{"id":"https://openalex.org/I170201317","display_name":"University of Pittsburgh","ror":"https://ror.org/01an3r305","country_code":"US","type":"education","lineage":["https://openalex.org/I170201317"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Daqing He","raw_affiliation_strings":["School of Information Science and Intelligent System Program University of Pittsburgh 135 North Bellefield Avenue Pittsburgh PA 15260","School of Information Science and Intelligent System Program, University of Pittsburgh, 135 North Bellefield Avenue, Pittsburgh, PA 15260"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Information Science and Intelligent System Program University of Pittsburgh 135 North Bellefield Avenue Pittsburgh PA 15260","institution_ids":["https://openalex.org/I170201317"]},{"raw_affiliation_string":"School of Information Science and Intelligent System Program, University of Pittsburgh, 135 North Bellefield Avenue, Pittsburgh, PA 15260","institution_ids":["https://openalex.org/I170201317"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5026188630","https://openalex.org/A5053659741","https://openalex.org/A5056318061"],"corresponding_institution_ids":["https://openalex.org/I170201317","https://openalex.org/I200845125","https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":3.092,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.9325476,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"53","issue":"1","first_page":"1","last_page":"10"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9955999851226807,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10538","display_name":"Data Mining Algorithms and Applications","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.8843129873275757},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8611498475074768},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.8407704830169678},{"id":"https://openalex.org/keywords/document-clustering","display_name":"Document clustering","score":0.6528751254081726},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.6071967482566833},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.5260232090950012},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5235735774040222},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5057210922241211},{"id":"https://openalex.org/keywords/vector-space-model","display_name":"Vector space model","score":0.47113436460494995},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.45915594696998596},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.45073169469833374},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4395603537559509}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.8843129873275757},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8611498475074768},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.8407704830169678},{"id":"https://openalex.org/C177937566","wikidata":"https://www.wikidata.org/wiki/Q4223102","display_name":"Document clustering","level":3,"score":0.6528751254081726},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.6071967482566833},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.5260232090950012},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5235735774040222},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5057210922241211},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.47113436460494995},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.45915594696998596},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.45073169469833374},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4395603537559509},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1002/pra2.2016.14505301065","is_oa":false,"landing_page_url":"https://doi.org/10.1002/pra2.2016.14505301065","pdf_url":null,"source":{"id":"https://openalex.org/S4393918545","display_name":"Proceedings of the Association for Information Science and Technology","issn_l":"2373-9231","issn":["2373-9231"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Association for Information Science and Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/7","display_name":"Affordable and clean energy"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320327557","display_name":"National Office for Philosophy and Social Sciences","ror":"https://ror.org/04m0ms912"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W206021124","https://openalex.org/W1489457428","https://openalex.org/W1539070015","https://openalex.org/W1556562472","https://openalex.org/W1566352582","https://openalex.org/W1604192292","https://openalex.org/W1660390307","https://openalex.org/W1768657112","https://openalex.org/W1846766366","https://openalex.org/W1880262756","https://openalex.org/W1958912078","https://openalex.org/W1978394996","https://openalex.org/W1981257123","https://openalex.org/W1987971958","https://openalex.org/W1996480483","https://openalex.org/W2001082470","https://openalex.org/W2017269839","https://openalex.org/W2024635814","https://openalex.org/W2025365931","https://openalex.org/W2033593667","https://openalex.org/W2038124402","https://openalex.org/W2052864155","https://openalex.org/W2056458023","https://openalex.org/W2069109145","https://openalex.org/W2073844507","https://openalex.org/W2091507117","https://openalex.org/W2100456378","https://openalex.org/W2103235794","https://openalex.org/W2107743791","https://openalex.org/W2120467164","https://openalex.org/W2120641853","https://openalex.org/W2129984229","https://openalex.org/W2131744502","https://openalex.org/W2131912994","https://openalex.org/W2138615112","https://openalex.org/W2147152072","https://openalex.org/W2153579005","https://openalex.org/W2163922914","https://openalex.org/W2165232124","https://openalex.org/W2165612380","https://openalex.org/W2204007383","https://openalex.org/W2252209277","https://openalex.org/W2294384375","https://openalex.org/W2295586785","https://openalex.org/W2435251607","https://openalex.org/W2499419096","https://openalex.org/W2950133940","https://openalex.org/W2953337630","https://openalex.org/W2962684168","https://openalex.org/W2963759421","https://openalex.org/W4233135949","https://openalex.org/W4237791300","https://openalex.org/W7058246535"],"related_works":["https://openalex.org/W2769501189","https://openalex.org/W4315588616","https://openalex.org/W4312773271","https://openalex.org/W2888805565","https://openalex.org/W2962686197","https://openalex.org/W3005513013","https://openalex.org/W2207653751","https://openalex.org/W2611137333","https://openalex.org/W4309228610","https://openalex.org/W4294597112"],"abstract_inverted_index":{"ABSTRACT":[0],"Globalization":[1],"places":[2],"people":[3],"in":[4,20,34,89,99,198,231],"a":[5,10],"multilingual":[6,38,73],"environment.":[7],"There":[8,93],"is":[9,41,84,205],"growing":[11],"number":[12],"of":[13,43,58,122,129,143,155,168,188,220,234,245,250,259],"users":[14],"to":[15,30,77,87,208,263],"access":[16],"and":[17,47,61,105,117,146,150,158,181],"share":[18],"information":[19,33],"several":[21],"languages":[22],"for":[23,53,64,71,211,241,256],"public":[24],"or":[25],"private":[26],"purpose.":[27],"In":[28,108,132],"order":[29],"deliver":[31],"relevant":[32],"different":[35,166,218],"languages,":[36],"efficient":[37],"documents":[39,54,74,100,140,152,230,243],"management":[40],"worthy":[42],"study.":[44],"Generally,":[45],"classification":[46],"clustering":[48,106,130,200,216,229],"are":[49,94],"two":[50,95,217],"typical":[51],"methods":[52,124,197,262],"management.":[55],"However,":[56],"lack":[57],"training":[59],"data":[60],"high":[62],"efforts":[63],"corpus":[65],"annotation":[66],"will":[67],"increase":[68],"the":[69,120],"cost":[70],"classifying":[72],"which":[75],"needs":[76],"bridge":[78],"language":[79],"gaps":[80],"as":[81,161],"well.":[82],"Clustering":[83],"more":[85],"suitable":[86],"implement":[88],"such":[90],"practical":[91],"applications.":[92],"main":[96],"factors":[97],"involved":[98],"clustering,":[101],"document":[102,114,169,260],"representation":[103,115,123,170,261],"method":[104,116],"algorithm.":[107],"this":[109],"paper,":[110],"we":[111,135],"focus":[112],"on":[113,127,141,153],"demonstrate":[118],"that":[119],"choice":[121],"has":[125],"impacts":[126],"quality":[128],"results.":[131],"our":[133],"experiment,":[134],"use":[136],"parallel":[137,246],"corpora":[138,148,209,236,251],"(English\u2010Chinese":[139],"topic":[142],"technology":[144,157],"information)":[145],"comparable":[147,221,235],"(English":[149],"Chinese":[151],"topics":[154,219],"mobile":[156],"wind":[159],"energy)":[160],"dataset.":[162],"We":[163],"compare":[164],"four":[165],"types":[167],"methods:":[171],"Vector":[172,189],"Space":[173,190],"Model,":[174],"Latent":[175,178,202,223],"Semantic":[176,203],"Indexing,":[177],"Dirichlet":[179,224],"Allocation":[180,225],"Doc2Vec.":[182],"Experimental":[183],"results":[184],"show":[185],"that,":[186],"accuracy":[187],"Model":[191],"were":[192],"not":[193],"competitive":[194],"with":[195],"other":[196],"all":[199],"tasks.":[201],"Indexing":[204],"overly":[206],"sensitive":[207],"itself,":[210],"it":[212],"behaved":[213],"differently":[214],"when":[215,228],"corpora.":[222,247],"behaves":[226,239],"best":[227,240],"small":[232],"size":[233],"while":[237],"Doc2Vec":[238],"large":[242],"set":[244],"Accordingly,":[248],"characteristics":[249],"should":[252],"be":[253],"under":[254],"considerations":[255],"rational":[257],"utilization":[258],"have":[264],"better":[265],"performance.":[266]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":3},{"year":2017,"cited_by_count":2}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
