{"id":"https://openalex.org/W4391250817","doi":"https://doi.org/10.1109/coginfocom59411.2023.10397493","title":"Multilabel Clustering Analysis of the Croatian-English Parallel Corpus Based on Latent Dirichlet Allocation Algorithm","display_name":"Multilabel Clustering Analysis of the Croatian-English Parallel Corpus Based on Latent Dirichlet Allocation Algorithm","publication_year":2023,"publication_date":"2023-09-22","ids":{"openalex":"https://openalex.org/W4391250817","doi":"https://doi.org/10.1109/coginfocom59411.2023.10397493"},"language":"en","primary_location":{"id":"doi:10.1109/coginfocom59411.2023.10397493","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/coginfocom59411.2023.10397493","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 14th IEEE International Conference on Cognitive Infocommunications (CogInfoCom)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020802433","display_name":"Erzs\u00e9bet T\u00f3th","orcid":"https://orcid.org/0000-0003-1805-6283"},"institutions":[{"id":"https://openalex.org/I132735039","display_name":"University of Debrecen","ror":"https://ror.org/02xf66n48","country_code":"HU","type":"education","lineage":["https://openalex.org/I132735039"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Erzs\u00e9bet T\u00f3th","raw_affiliation_strings":["University of Debrecen,Faculty of Informatics,Debrecen,Hungary","Faculty of Informatics, University of Debrecen, Debrecen, Hungary"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"University of Debrecen,Faculty of Informatics,Debrecen,Hungary","institution_ids":["https://openalex.org/I132735039"]},{"raw_affiliation_string":"Faculty of Informatics, University of Debrecen, Debrecen, Hungary","institution_ids":["https://openalex.org/I132735039"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5011344244","display_name":"Zolt\u00e1n G\u00e1l","orcid":"https://orcid.org/0000-0003-1771-6497"},"institutions":[{"id":"https://openalex.org/I132735039","display_name":"University of Debrecen","ror":"https://ror.org/02xf66n48","country_code":"HU","type":"education","lineage":["https://openalex.org/I132735039"]}],"countries":["HU"],"is_corresponding":false,"raw_author_name":"Zolt\u00e1n G\u00e1l","raw_affiliation_strings":["Center of Supercomputing, University of Debrecen,Debrecen,Hungary","Center of Supercomputing, University of Debrecen, Debrecen, Hungary"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Center of Supercomputing, University of Debrecen,Debrecen,Hungary","institution_ids":["https://openalex.org/I132735039"]},{"raw_affiliation_string":"Center of Supercomputing, University of Debrecen, Debrecen, Hungary","institution_ids":["https://openalex.org/I132735039"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I132735039"],"apc_list":null,"apc_paid":null,"fwci":0.1613,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.59529416,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":"8725","issue":null,"first_page":"000025","last_page":"000032"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9990000128746033,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/latent-dirichlet-allocation","display_name":"Latent Dirichlet allocation","score":0.8688821792602539},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7869167327880859},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5762031674385071},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.522476077079773},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.4792461395263672},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.4500947594642639},{"id":"https://openalex.org/keywords/hamming-distance","display_name":"Hamming distance","score":0.43372637033462524},{"id":"https://openalex.org/keywords/topic-model","display_name":"Topic model","score":0.40751898288726807},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.40402787923812866},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.35562169551849365}],"concepts":[{"id":"https://openalex.org/C500882744","wikidata":"https://www.wikidata.org/wiki/Q269236","display_name":"Latent Dirichlet allocation","level":3,"score":0.8688821792602539},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7869167327880859},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5762031674385071},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.522476077079773},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.4792461395263672},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.4500947594642639},{"id":"https://openalex.org/C193319292","wikidata":"https://www.wikidata.org/wiki/Q272172","display_name":"Hamming distance","level":2,"score":0.43372637033462524},{"id":"https://openalex.org/C171686336","wikidata":"https://www.wikidata.org/wiki/Q3532085","display_name":"Topic model","level":2,"score":0.40751898288726807},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40402787923812866},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.35562169551849365}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/coginfocom59411.2023.10397493","is_oa":false,"landing_page_url":"http://dx.doi.org/10.1109/coginfocom59411.2023.10397493","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 14th IEEE International Conference on Cognitive Infocommunications (CogInfoCom)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":20,"referenced_works":["https://openalex.org/W658480537","https://openalex.org/W1971415293","https://openalex.org/W1999954155","https://openalex.org/W2090630554","https://openalex.org/W2114315281","https://openalex.org/W2146241755","https://openalex.org/W2165599843","https://openalex.org/W2496235729","https://openalex.org/W2606098075","https://openalex.org/W2739996966","https://openalex.org/W2911798819","https://openalex.org/W3159649695","https://openalex.org/W4211003447","https://openalex.org/W4231510805","https://openalex.org/W4247292419","https://openalex.org/W4290802627","https://openalex.org/W6606433418","https://openalex.org/W6764454516","https://openalex.org/W6841070347","https://openalex.org/W7062422307"],"related_works":["https://openalex.org/W4312773271","https://openalex.org/W4315588616","https://openalex.org/W2769501189","https://openalex.org/W2888805565","https://openalex.org/W2962686197","https://openalex.org/W2207653751","https://openalex.org/W3159709618","https://openalex.org/W2611137333","https://openalex.org/W3005513013","https://openalex.org/W4389543811"],"abstract_inverted_index":{"A":[0],"parallel":[1],"corpus":[2,99],"of":[3,18,42,64,105,126,136,141,160,163,170,173,183],"Croatian":[4],"EU":[5],"legislative":[6],"documents":[7],"translated":[8],"automatically":[9,92],"to":[10,35,91,120,156],"English":[11],"over":[12],"28":[13],"years":[14],"with":[15,100],"a":[16,102,158],"year":[17],"creation":[19],"and":[20,28,87,139],"hierarchical":[21],"classifier":[22],"tags":[23,96,119],"including":[24],"descriptors,":[25],"document":[26],"types,":[27],"fields":[29,50,182],"considered":[30],"as":[31,74],"meta":[32],"information":[33],"assigned":[34],"each":[36],"text.":[37],"Only":[38],"two":[39],"third":[40],"part":[41],"around":[43],"1.5":[44],"thousand":[45],"texts":[46,66,166],"have":[47],"all":[48],"the":[49,53,94,98,106,127,134,137,168,174,180,184],"completed,":[51],"accomplishing":[52],"required":[54],"manual":[55,107],"work":[56],"too":[57],"time-consuming":[58],"for":[59,97],"human":[60],"administration.":[61],"Similar":[62],"incompleteness":[63],"legal":[65,71,165,181],"may":[67],"appear":[68],"in":[69,133],"official":[70],"sites":[72],"operated":[73],"regular":[75],"service":[76],"provisioning":[77],"databases.":[78],"In":[79],"this":[80,147],"paper":[81],"we":[82],"proposed":[83],"an":[84],"artificial":[85],"cognitive":[86],"multilabel":[88],"classification":[89],"method":[90],"find":[93],"necessary":[95],"just":[101],"tiny":[103],"fraction":[104],"tagging":[108],"time.":[109],"The":[110,124],"Latent":[111],"Dirichlet":[112],"Allocation":[113],"algorithm":[114,148],"assigns":[115],"field":[116],"values":[117],"or":[118],"incompletely":[121],"labelled":[122],"documents.":[123],"dependence":[125],"quantitative":[128],"linguistics":[129],"properties":[130],"was":[131],"presented":[132],"function":[135],"type":[138],"specialty":[140],"pre-processing":[142],"tasks.":[143],"We":[144],"successfully":[145],"applied":[146],"built":[149],"on":[150,167],"no":[151],"error":[152],"correcting":[153],"optimising":[154],"codes":[155],"predict":[157],"mixture":[159],"topic":[161],"probabilities":[162],"these":[164],"basis":[169],"Hamming":[171],"distance":[172],"binary":[175],"feature":[176],"vectors":[177],"created":[178],"using":[179],"EUROVOC":[185],"multilingual":[186],"thesaurus.":[187]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
