{"id":"https://openalex.org/W4285334212","doi":"https://doi.org/10.1109/la-cci48322.2021.9769839","title":"Comparison of Feature Extraction Methods for Brazilian Legal Documents Clustering","display_name":"Comparison of Feature Extraction Methods for Brazilian Legal Documents Clustering","publication_year":2021,"publication_date":"2021-11-02","ids":{"openalex":"https://openalex.org/W4285334212","doi":"https://doi.org/10.1109/la-cci48322.2021.9769839"},"language":"en","primary_location":{"id":"doi:10.1109/la-cci48322.2021.9769839","is_oa":false,"landing_page_url":"https://doi.org/10.1109/la-cci48322.2021.9769839","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Latin American Conference on Computational Intelligence (LA-CCI)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5077763551","display_name":"Joao Pedro Lima","orcid":null},"institutions":[{"id":"https://openalex.org/I35046152","display_name":"Universidade Federal do Rio Grande do Norte","ror":"https://ror.org/04wn09761","country_code":"BR","type":"education","lineage":["https://openalex.org/I35046152"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"Joao Pedro Lima","raw_affiliation_strings":["Universidade Federal do Rio Grande do Norte,Natal,Brazil","Universidade Federal do Rio Grande do Norte, Natal, Brazil"],"affiliations":[{"raw_affiliation_string":"Universidade Federal do Rio Grande do Norte,Natal,Brazil","institution_ids":["https://openalex.org/I35046152"]},{"raw_affiliation_string":"Universidade Federal do Rio Grande do Norte, Natal, Brazil","institution_ids":["https://openalex.org/I35046152"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025179228","display_name":"Jos\u00e9 Alfredo Ferreira Costa","orcid":"https://orcid.org/0000-0002-1290-6454"},"institutions":[{"id":"https://openalex.org/I35046152","display_name":"Universidade Federal do Rio Grande do Norte","ror":"https://ror.org/04wn09761","country_code":"BR","type":"education","lineage":["https://openalex.org/I35046152"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Jose Alfredo Costa","raw_affiliation_strings":["Universidade Federal do Rio Grande do Norte,Natal,Brazil","Universidade Federal do Rio Grande do Norte, Natal, Brazil"],"affiliations":[{"raw_affiliation_string":"Universidade Federal do Rio Grande do Norte,Natal,Brazil","institution_ids":["https://openalex.org/I35046152"]},{"raw_affiliation_string":"Universidade Federal do Rio Grande do Norte, Natal, Brazil","institution_ids":["https://openalex.org/I35046152"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076211869","display_name":"Di\u00f3genes Carlos Ara\u00fajo","orcid":null},"institutions":[{"id":"https://openalex.org/I35046152","display_name":"Universidade Federal do Rio Grande do Norte","ror":"https://ror.org/04wn09761","country_code":"BR","type":"education","lineage":["https://openalex.org/I35046152"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Diogenes Carlos Araujo","raw_affiliation_strings":["Universidade Federal do Rio Grande do Norte,Natal,Brazil","Universidade Federal do Rio Grande do Norte, Natal, Brazil"],"affiliations":[{"raw_affiliation_string":"Universidade Federal do Rio Grande do Norte,Natal,Brazil","institution_ids":["https://openalex.org/I35046152"]},{"raw_affiliation_string":"Universidade Federal do Rio Grande do Norte, Natal, Brazil","institution_ids":["https://openalex.org/I35046152"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5077763551"],"corresponding_institution_ids":["https://openalex.org/I35046152"],"apc_list":null,"apc_paid":null,"fwci":2.4097,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.92342873,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T13643","display_name":"Artificial Intelligence in Law","score":0.9861999750137329,"subfield":{"id":"https://openalex.org/subfields/3320","display_name":"Political Science and International Relations"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9702000021934509,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.7624045610427856},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7590717673301697},{"id":"https://openalex.org/keywords/tf\u2013idf","display_name":"tf\u2013idf","score":0.7453518509864807},{"id":"https://openalex.org/keywords/interpretability","display_name":"Interpretability","score":0.6392090320587158},{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.5624316334724426},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4844648838043213},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4731636643409729},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45113405585289},{"id":"https://openalex.org/keywords/brazilian-portuguese","display_name":"Brazilian Portuguese","score":0.42477524280548096},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.348808228969574},{"id":"https://openalex.org/keywords/term","display_name":"Term (time)","score":0.23247551918029785},{"id":"https://openalex.org/keywords/portuguese","display_name":"Portuguese","score":0.20138081908226013}],"concepts":[{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.7624045610427856},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7590717673301697},{"id":"https://openalex.org/C81758059","wikidata":"https://www.wikidata.org/wiki/Q796584","display_name":"tf\u2013idf","level":3,"score":0.7453518509864807},{"id":"https://openalex.org/C2781067378","wikidata":"https://www.wikidata.org/wiki/Q17027399","display_name":"Interpretability","level":2,"score":0.6392090320587158},{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.5624316334724426},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4844648838043213},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4731636643409729},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45113405585289},{"id":"https://openalex.org/C2778880076","wikidata":"https://www.wikidata.org/wiki/Q750553","display_name":"Brazilian Portuguese","level":3,"score":0.42477524280548096},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.348808228969574},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.23247551918029785},{"id":"https://openalex.org/C35219183","wikidata":"https://www.wikidata.org/wiki/Q5146","display_name":"Portuguese","level":2,"score":0.20138081908226013},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/la-cci48322.2021.9769839","is_oa":false,"landing_page_url":"https://doi.org/10.1109/la-cci48322.2021.9769839","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 IEEE Latin American Conference on Computational Intelligence (LA-CCI)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions","score":0.7900000214576721}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1501592829","https://openalex.org/W1989896487","https://openalex.org/W2101234009","https://openalex.org/W2131744502","https://openalex.org/W2250539671","https://openalex.org/W2806332249","https://openalex.org/W2896457183","https://openalex.org/W2907048223","https://openalex.org/W2953979308","https://openalex.org/W2955507644","https://openalex.org/W2963026768","https://openalex.org/W2963923670","https://openalex.org/W3016649969","https://openalex.org/W3119409427","https://openalex.org/W3164539510","https://openalex.org/W3199624848","https://openalex.org/W4250042253","https://openalex.org/W4294170691","https://openalex.org/W4298857951","https://openalex.org/W6630108961","https://openalex.org/W6675354045","https://openalex.org/W6679775712","https://openalex.org/W6682691769","https://openalex.org/W6743359967","https://openalex.org/W6755207826","https://openalex.org/W6758010508","https://openalex.org/W6764761971","https://openalex.org/W6788759771","https://openalex.org/W6795889794","https://openalex.org/W6801367590"],"related_works":["https://openalex.org/W2903145235","https://openalex.org/W4226211987","https://openalex.org/W2574070988","https://openalex.org/W2913738019","https://openalex.org/W2747336051","https://openalex.org/W2580878117","https://openalex.org/W2208234687","https://openalex.org/W2999349876","https://openalex.org/W3070760781","https://openalex.org/W4388039896"],"abstract_inverted_index":{"This":[0],"article":[1],"aims":[2],"to":[3,136,155],"evaluate":[4],"the":[5,14,53,62,84,92,117,132,138,142,145,160],"impact":[6],"of":[7,16,25,28,55,86,89,107,113,116,162],"different":[8,39,43],"textual":[9],"feature":[10],"extraction":[11],"methods":[12,153],"in":[13,38,51,91,104,110,148],"task":[15],"clustering":[17,56,75],"Brazilian":[18,111],"legal":[19,93],"texts.":[20],"We":[21],"compared":[22],"Binary":[23],"Bag":[24,27],"Words,":[26,29],"Term":[30],"Frequency-Inverse":[31],"Document":[32],"Frequency,":[33],"Word2vec":[34],"and":[35,41,78,98],"Doc2vec":[36],"models":[37,147],"dimensions":[40],"with":[42,159],"hyperparameters,":[44],"totaling":[45],"45":[46],"models.":[47],"Our":[48,100],"experiment":[49],"consists":[50],"evaluating":[52],"result":[54],"done":[57,71],"by":[58,65],"K-Means":[59],"algorithm":[60,90],"over":[61],"vectors":[63],"created":[64],"each":[66],"model.":[67],"The":[68,127,151],"evaluation":[69,76],"was":[70],"both":[72],"quantitatively,":[73],"using":[74],"metrics,":[77],"qualitatively,":[79],"considering":[80],"relevant":[81],"aspects":[82],"for":[83,141],"application":[85],"this":[87],"type":[88],"environment,":[94],"such":[95],"as":[96],"transparency":[97],"interpretability.":[99],"experiments":[101],"were":[102],"conducted":[103],"a":[105],"database":[106],"30,000":[108],"documents":[109],"Portuguese":[112],"judicial":[114],"moves":[115],"Tribunal":[118],"de":[119],"Justi\u00e7a":[120],"do":[121,124],"Rio":[122],"Grande":[123],"Norte":[125],"(TJRN).":[126],"research":[128],"results":[129],"suggest":[130],"that":[131],"TF-IDF":[133],"method":[134],"seems":[135],"be":[137],"most":[139],"suitable":[140],"task,":[143],"outperforming":[144],"other":[146,152],"considered":[149],"metrics.":[150],"appear":[154],"perform":[156],"equally":[157],"well,":[158],"exception":[161],"Doc2vec,":[163],"which":[164],"performed":[165],"poorly.":[166]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
