{"id":"https://openalex.org/W2529769913","doi":"https://doi.org/10.2298/csis151017030g","title":"Hierarchical vs. flat n-gram-based text categorization: Can we do better?","display_name":"Hierarchical vs. flat n-gram-based text categorization: Can we do better?","publication_year":2016,"publication_date":"2016-10-07","ids":{"openalex":"https://openalex.org/W2529769913","doi":"https://doi.org/10.2298/csis151017030g","mag":"2529769913"},"language":"en","primary_location":{"id":"doi:10.2298/csis151017030g","is_oa":true,"landing_page_url":"https://doi.org/10.2298/csis151017030g","pdf_url":null,"source":{"id":"https://openalex.org/S206939107","display_name":"Computer Science and Information Systems","issn_l":"1820-0214","issn":["1820-0214","2406-1018"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310321031","host_organization_name":"ComSIS Consortium","host_organization_lineage":["https://openalex.org/P4310321031"],"host_organization_lineage_names":["ComSIS Consortium"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computer Science and Information Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://doi.org/10.2298/csis151017030g","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5007594744","display_name":"Jelena Graovac","orcid":"https://orcid.org/0000-0002-9323-4695"},"institutions":[{"id":"https://openalex.org/I4068193","display_name":"University of Belgrade","ror":"https://ror.org/02qsmb048","country_code":"RS","type":"education","lineage":["https://openalex.org/I4068193"]}],"countries":["RS"],"is_corresponding":true,"raw_author_name":"Jelena Graovac","raw_affiliation_strings":["Faculty of Mathematics, Belgrade","Faculty of Mathematics, University of Belgrade Studentski trg 16, 11000 Belgrade, Serbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Belgrade","institution_ids":["https://openalex.org/I4068193"]},{"raw_affiliation_string":"Faculty of Mathematics, University of Belgrade Studentski trg 16, 11000 Belgrade, Serbia","institution_ids":["https://openalex.org/I4068193"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076714586","display_name":"Jovana Kova\u010devi\u0107","orcid":"https://orcid.org/0000-0003-1254-4294"},"institutions":[{"id":"https://openalex.org/I4068193","display_name":"University of Belgrade","ror":"https://ror.org/02qsmb048","country_code":"RS","type":"education","lineage":["https://openalex.org/I4068193"]},{"id":"https://openalex.org/I4210119109","display_name":"Indiana University Bloomington","ror":"https://ror.org/02k40bc56","country_code":"US","type":"education","lineage":["https://openalex.org/I4210119109","https://openalex.org/I592451"]}],"countries":["RS","US"],"is_corresponding":false,"raw_author_name":"Jovana Kovacevic","raw_affiliation_strings":["Faculty of Mathematics, Belgrade + Indiana University Bloomington, School of Informatics and Computing, Indiana, USA","Faculty of Mathematics, University of Belgrade Studentski trg 16, 11000 Belgrade, Serbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Belgrade + Indiana University Bloomington, School of Informatics and Computing, Indiana, USA","institution_ids":["https://openalex.org/I4210119109"]},{"raw_affiliation_string":"Faculty of Mathematics, University of Belgrade Studentski trg 16, 11000 Belgrade, Serbia","institution_ids":["https://openalex.org/I4068193"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5066739339","display_name":"Gordana Pavlovi\u0107-La\u017eeti\u0107","orcid":"https://orcid.org/0000-0002-0665-1053"},"institutions":[{"id":"https://openalex.org/I4068193","display_name":"University of Belgrade","ror":"https://ror.org/02qsmb048","country_code":"RS","type":"education","lineage":["https://openalex.org/I4068193"]}],"countries":["RS"],"is_corresponding":false,"raw_author_name":"Gordana Pavlovic-Lazetic","raw_affiliation_strings":["Faculty of Mathematics, Belgrade","Faculty of Mathematics, University of Belgrade Studentski trg 16, 11000 Belgrade, Serbia"],"affiliations":[{"raw_affiliation_string":"Faculty of Mathematics, Belgrade","institution_ids":["https://openalex.org/I4068193"]},{"raw_affiliation_string":"Faculty of Mathematics, University of Belgrade Studentski trg 16, 11000 Belgrade, Serbia","institution_ids":["https://openalex.org/I4068193"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007594744"],"corresponding_institution_ids":["https://openalex.org/I4068193"],"apc_list":null,"apc_paid":null,"fwci":2.5709,"has_fulltext":false,"cited_by_count":14,"citation_normalized_percentile":{"value":0.91974755,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"14","issue":"1","first_page":"103","last_page":"121"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9912999868392944,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11269","display_name":"Algorithms and Data Compression","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8326908946037292},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.6767569780349731},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6702793836593628},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.6588218808174133},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6226887702941895},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.5576427578926086},{"id":"https://openalex.org/keywords/preprocessor","display_name":"Preprocessor","score":0.530691385269165},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.50861656665802},{"id":"https://openalex.org/keywords/text-graph","display_name":"Text graph","score":0.45755186676979065},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.37218472361564636},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3290819525718689},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.2362186312675476},{"id":"https://openalex.org/keywords/text-mining","display_name":"Text mining","score":0.22106072306632996}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8326908946037292},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.6767569780349731},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6702793836593628},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.6588218808174133},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6226887702941895},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.5576427578926086},{"id":"https://openalex.org/C34736171","wikidata":"https://www.wikidata.org/wiki/Q918333","display_name":"Preprocessor","level":2,"score":0.530691385269165},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.50861656665802},{"id":"https://openalex.org/C66945725","wikidata":"https://www.wikidata.org/wiki/Q18388823","display_name":"Text graph","level":3,"score":0.45755186676979065},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.37218472361564636},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3290819525718689},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2362186312675476},{"id":"https://openalex.org/C71472368","wikidata":"https://www.wikidata.org/wiki/Q676880","display_name":"Text mining","level":2,"score":0.22106072306632996}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.2298/csis151017030g","is_oa":true,"landing_page_url":"https://doi.org/10.2298/csis151017030g","pdf_url":null,"source":{"id":"https://openalex.org/S206939107","display_name":"Computer Science and Information Systems","issn_l":"1820-0214","issn":["1820-0214","2406-1018"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310321031","host_organization_name":"ComSIS Consortium","host_organization_lineage":["https://openalex.org/P4310321031"],"host_organization_lineage_names":["ComSIS Consortium"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computer Science and Information Systems","raw_type":"journal-article"},{"id":"pmh:oai:doiserbia:1820-02141600030G","is_oa":false,"landing_page_url":"http://doi.org/10.2298/CSIS151017030G","pdf_url":null,"source":{"id":"https://openalex.org/S4306400515","display_name":"Digital Object Identifier (DOI) Repository Serbia (National Library of Serbia)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210142455","host_organization_name":"National Library of Serbia","host_organization_lineage":["https://openalex.org/I4210142455"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":". (00) 30-30"}],"best_oa_location":{"id":"doi:10.2298/csis151017030g","is_oa":true,"landing_page_url":"https://doi.org/10.2298/csis151017030g","pdf_url":null,"source":{"id":"https://openalex.org/S206939107","display_name":"Computer Science and Information Systems","issn_l":"1820-0214","issn":["1820-0214","2406-1018"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310321031","host_organization_name":"ComSIS Consortium","host_organization_lineage":["https://openalex.org/P4310321031"],"host_organization_lineage_names":["ComSIS Consortium"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computer Science and Information Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7699999809265137}],"awards":[{"id":"https://openalex.org/G984628184","display_name":null,"funder_award_id":"174021 i  III47003","funder_id":"https://openalex.org/F4320322729","funder_display_name":"Ministarstvo Prosvete, Nauke i Tehnolo\u0161kog Razvoja"}],"funders":[{"id":"https://openalex.org/F4320322729","display_name":"Ministarstvo Prosvete, Nauke i Tehnolo\u0161kog Razvoja","ror":"https://ror.org/01znas443"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W65650981","https://openalex.org/W128199165","https://openalex.org/W1020256256","https://openalex.org/W1493526108","https://openalex.org/W1524433756","https://openalex.org/W1529790664","https://openalex.org/W1530114168","https://openalex.org/W1550206324","https://openalex.org/W1586089682","https://openalex.org/W1586252162","https://openalex.org/W1597406287","https://openalex.org/W1604792744","https://openalex.org/W1620204465","https://openalex.org/W1660390307","https://openalex.org/W1847213163","https://openalex.org/W1868454393","https://openalex.org/W1997530783","https://openalex.org/W2007098590","https://openalex.org/W2016768890","https://openalex.org/W2022645200","https://openalex.org/W2024119624","https://openalex.org/W2027204661","https://openalex.org/W2031248101","https://openalex.org/W2034025776","https://openalex.org/W2046114481","https://openalex.org/W2070535792","https://openalex.org/W2070542280","https://openalex.org/W2077275826","https://openalex.org/W2091084070","https://openalex.org/W2105842272","https://openalex.org/W2120820103","https://openalex.org/W2121082043","https://openalex.org/W2131070108","https://openalex.org/W2132201804","https://openalex.org/W2140321362","https://openalex.org/W2150102617","https://openalex.org/W2150766729","https://openalex.org/W2170770919","https://openalex.org/W2180101149","https://openalex.org/W2322183283","https://openalex.org/W2429914308","https://openalex.org/W3122990092"],"related_works":["https://openalex.org/W2360898036","https://openalex.org/W2390857744","https://openalex.org/W2390698788","https://openalex.org/W2133651098","https://openalex.org/W2078570174","https://openalex.org/W2383063829","https://openalex.org/W2138922887","https://openalex.org/W2035261173","https://openalex.org/W2106892947","https://openalex.org/W2237299843"],"abstract_inverted_index":{"Hierarchical":[0],"text":[1,8,57,129],"categorization":[2,39,115,130,169],"(HTC)":[3],"refers":[4],"to":[5,10,118,158],"assigning":[6],"a":[7,18],"document":[9,45,65],"one":[11],"or":[12,60,67],"more":[13],"most":[14],"suitable":[15],"categories":[16,121],"from":[17,152],"hierarchical":[19,168],"category":[20,88],"space.":[21],"In":[22],"this":[23],"paper":[24],"we":[25],"present":[26],"two":[27],"HTC":[28,137,146],"techniques":[29,37,74,116,186],"based":[30,44],"on":[31,84,138,170],"kNN":[32],"and":[33,41,52,75,98,102,142,167,177],"SVM":[34],"machine":[35],"learning":[36],"for":[38],"process":[40],"byte":[42],"n-gram":[43],"representation.":[46],"They":[47],"are":[48,79,109],"fully":[49],"language":[50,77],"independent":[51],"do":[53,148],"not":[54,149],"require":[55],"any":[56,61],"preprocessing":[58],"steps,":[59],"prior":[62],"information":[63],"about":[64],"content":[66],"language.":[68],"The":[69,106],"effectiveness":[70],"of":[71,122],"the":[72,112,123,139,153,174,184],"presented":[73],"their":[76],"independence":[78],"demonstrated":[80],"in":[81,92,100,104],"experiments":[82],"performed":[83],"five":[85],"tree-structured":[86],"benchmark":[87],"hierarchies":[89],"that":[90,183],"differ":[91,151],"many":[93],"aspects:":[94],"Reuters-Hier1,":[95],"Reuters-Hier2,":[96],"15NGHier":[97],"20NGHier":[99,143],"English":[101],"TanCorpHier":[103,141],"Chinese.":[105],"results":[107,134,147],"obtained":[108,187],"compared":[110],"with":[111],"corresponding":[113,154],"flat":[114,128,155,166],"applied":[117],"leaf":[119],"level":[120],"considered":[124],"hierarchies.":[125],"While":[126],"kNN-based":[127,136,165],"produced":[131],"slightly":[132],"better":[133],"than":[135],"largest":[140],"datasets,":[144],"SVM-based":[145],"considerably":[150],"techniques,":[156],"due":[157],"shallow":[159],"hierarchies;":[160],"still,":[161],"they":[162],"outperform":[163],"both":[164],"all":[171],"corpora":[172],"except":[173],"smallest":[175],"Reuters-Hier1":[176],"Reuters-Hier2":[178],"datasets.":[179],"Formal":[180],"evaluation":[181],"confirmed":[182],"proposed":[185],"state-of-the-art":[188],"results.":[189]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":2},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2016-10-14T00:00:00"}
