{"id":"https://openalex.org/W3158069136","doi":"https://doi.org/10.3233/ida-205154","title":"Efficient n-gram construction for text categorization using feature selection techniques","display_name":"Efficient n-gram construction for text categorization using feature selection techniques","publication_year":2021,"publication_date":"2021-04-20","ids":{"openalex":"https://openalex.org/W3158069136","doi":"https://doi.org/10.3233/ida-205154","mag":"3158069136"},"language":"en","primary_location":{"id":"doi:10.3233/ida-205154","is_oa":false,"landing_page_url":"https://doi.org/10.3233/ida-205154","pdf_url":null,"source":{"id":"https://openalex.org/S2498839158","display_name":"Intelligent Data Analysis","issn_l":"1088-467X","issn":["1088-467X","1571-4128"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Data Analysis","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000787609","display_name":"Maximiliano Garc\u00eda","orcid":null},"institutions":[{"id":"https://openalex.org/I189977406","display_name":"Universidad de Los Andes, Chile","ror":"https://ror.org/03v0qd864","country_code":"CL","type":"education","lineage":["https://openalex.org/I189977406"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"Maximiliano Garc\u00eda","raw_affiliation_strings":["Universidad de los Andes, Santiago, Chile"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Universidad de los Andes, Santiago, Chile","institution_ids":["https://openalex.org/I189977406"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5007513550","display_name":"Sebasti\u00e1n Maldonado","orcid":"https://orcid.org/0000-0002-7124-0437"},"institutions":[{"id":"https://openalex.org/I4210148528","display_name":"Complex Engineering System Institute","ror":"https://ror.org/04wnc7270","country_code":"CL","type":"facility","lineage":["https://openalex.org/I4210148528"]},{"id":"https://openalex.org/I69737025","display_name":"University of Chile","ror":"https://ror.org/047gc3g35","country_code":"CL","type":"education","lineage":["https://openalex.org/I69737025"]}],"countries":["CL"],"is_corresponding":true,"raw_author_name":"Sebasti\u00e1n Maldonado","raw_affiliation_strings":["Department of Management Control and Information Systems, School of Economics and Business, University of Chile, Santiago, Chile","Instituto Sistemas Complejos de Ingenier\u00eda (ISCI), Chile"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Management Control and Information Systems, School of Economics and Business, University of Chile, Santiago, Chile","institution_ids":["https://openalex.org/I69737025"]},{"raw_affiliation_string":"Instituto Sistemas Complejos de Ingenier\u00eda (ISCI), Chile","institution_ids":["https://openalex.org/I4210148528"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037252217","display_name":"Carla Vairetti","orcid":"https://orcid.org/0000-0003-4612-1409"},"institutions":[{"id":"https://openalex.org/I189977406","display_name":"Universidad de Los Andes, Chile","ror":"https://ror.org/03v0qd864","country_code":"CL","type":"education","lineage":["https://openalex.org/I189977406"]},{"id":"https://openalex.org/I4210148528","display_name":"Complex Engineering System Institute","ror":"https://ror.org/04wnc7270","country_code":"CL","type":"facility","lineage":["https://openalex.org/I4210148528"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"Carla Vairetti","raw_affiliation_strings":["Instituto Sistemas Complejos de Ingenier\u00eda (ISCI), Chile","Universidad de los Andes, Santiago, Chile"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Instituto Sistemas Complejos de Ingenier\u00eda (ISCI), Chile","institution_ids":["https://openalex.org/I4210148528"]},{"raw_affiliation_string":"Universidad de los Andes, Santiago, Chile","institution_ids":["https://openalex.org/I189977406"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5007513550"],"corresponding_institution_ids":["https://openalex.org/I4210148528","https://openalex.org/I69737025"],"apc_list":null,"apc_paid":null,"fwci":2.5185,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.90977354,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":99},"biblio":{"volume":"25","issue":"3","first_page":"509","last_page":"525"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.7538205981254578},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7440117001533508},{"id":"https://openalex.org/keywords/a-priori-and-a-posteriori","display_name":"A priori and a posteriori","score":0.7085639834403992},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.6660811901092529},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6497414112091064},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6460971832275391},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.6287387013435364},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.5206120014190674},{"id":"https://openalex.org/keywords/text-categorization","display_name":"Text categorization","score":0.5204241871833801},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5141087174415588},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4929690361022949},{"id":"https://openalex.org/keywords/n-gram","display_name":"n-gram","score":0.47194308042526245},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4594794511795044},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.44817838072776794},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.40044572949409485},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3675411343574524},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.12201768159866333},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.11336067318916321},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.08377125859260559}],"concepts":[{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.7538205981254578},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7440117001533508},{"id":"https://openalex.org/C75553542","wikidata":"https://www.wikidata.org/wiki/Q178161","display_name":"A priori and a posteriori","level":2,"score":0.7085639834403992},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.6660811901092529},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6497414112091064},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6460971832275391},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.6287387013435364},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.5206120014190674},{"id":"https://openalex.org/C2986744138","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Text categorization","level":3,"score":0.5204241871833801},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5141087174415588},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4929690361022949},{"id":"https://openalex.org/C117884012","wikidata":"https://www.wikidata.org/wiki/Q94489","display_name":"n-gram","level":3,"score":0.47194308042526245},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4594794511795044},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.44817838072776794},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.40044572949409485},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3675411343574524},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.12201768159866333},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.11336067318916321},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.08377125859260559},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.3233/ida-205154","is_oa":false,"landing_page_url":"https://doi.org/10.3233/ida-205154","pdf_url":null,"source":{"id":"https://openalex.org/S2498839158","display_name":"Intelligent Data Analysis","issn_l":"1088-467X","issn":["1088-467X","1571-4128"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Intelligent Data Analysis","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","score":0.6000000238418579,"display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":53,"referenced_works":["https://openalex.org/W81723500","https://openalex.org/W1496702343","https://openalex.org/W1525491024","https://openalex.org/W1530378878","https://openalex.org/W2022855932","https://openalex.org/W2027875450","https://openalex.org/W2028569501","https://openalex.org/W2040424159","https://openalex.org/W2054275862","https://openalex.org/W2057228200","https://openalex.org/W2058982198","https://openalex.org/W2093038183","https://openalex.org/W2096707493","https://openalex.org/W2106388373","https://openalex.org/W2116786260","https://openalex.org/W2118561568","https://openalex.org/W2118585731","https://openalex.org/W2119437913","https://openalex.org/W2123005130","https://openalex.org/W2124479345","https://openalex.org/W2152269015","https://openalex.org/W2171444767","https://openalex.org/W2256782633","https://openalex.org/W2263270445","https://openalex.org/W2282289695","https://openalex.org/W2330587814","https://openalex.org/W2343999414","https://openalex.org/W2413498812","https://openalex.org/W2565436997","https://openalex.org/W2586608967","https://openalex.org/W2594232474","https://openalex.org/W2765937321","https://openalex.org/W2767768852","https://openalex.org/W2768676856","https://openalex.org/W2779217680","https://openalex.org/W2790215293","https://openalex.org/W2791315675","https://openalex.org/W2802777019","https://openalex.org/W2889599398","https://openalex.org/W2895560478","https://openalex.org/W2902016849","https://openalex.org/W2908285635","https://openalex.org/W2921446579","https://openalex.org/W2921875967","https://openalex.org/W2946974829","https://openalex.org/W2962913728","https://openalex.org/W3105524694","https://openalex.org/W3124891265","https://openalex.org/W4205184193","https://openalex.org/W6600708310","https://openalex.org/W6677656871","https://openalex.org/W6812014127","https://openalex.org/W6843169231"],"related_works":["https://openalex.org/W2360898036","https://openalex.org/W2374651319","https://openalex.org/W1978285683","https://openalex.org/W2241978443","https://openalex.org/W2375828317","https://openalex.org/W2391010859","https://openalex.org/W2363775966","https://openalex.org/W2111353337","https://openalex.org/W2384103485","https://openalex.org/W2131750090"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"we":[3],"present":[4],"a":[5,95,116,147,157],"novel":[6],"approach":[7,33,131],"for":[8,34,105,115,125],"n-gram":[9,45],"generation":[10],"in":[11,37,160,167],"text":[12,35,122],"classification.":[13],"The":[14,62],"a-priori":[15,60],"algorithm":[16],"is":[17,41],"adapted":[18],"to":[19,70],"prune":[20],"word":[21,84],"sequences":[22,85],"by":[23],"combining":[24],"three":[25],"feature":[26,39,53,117,141],"selection":[27,40,118,142],"techniques.":[28],"Unlike":[29],"the":[30,44,56,59,66,75,79,83,90,100,113,133,151],"traditional":[31],"two-step":[32],"classification":[36,123],"which":[38],"performed":[42],"after":[43],"construction":[46,80],"process,":[47],"our":[48,130,161],"proposal":[49,88],"performs":[50],"an":[51],"embedded":[52],"elimination":[54],"during":[55],"application":[57],"of":[58,68,81,93,150],"algorithm.":[61],"proposed":[63],"strategy":[64],"reduces":[65],"number":[67],"branches":[69],"be":[71,109],"explored,":[72],"speeding":[73],"up":[74],"process":[76],"and":[77,153,165],"making":[78],"all":[82],"tractable.":[86],"Our":[87],"has":[89],"additional":[91],"advantage":[92],"constructing":[94],"low-dimensional":[96],"dataset":[97],"with":[98,139],"only":[99],"features":[101],"that":[102,107,129,155],"are":[103],"relevant":[104],"classification,":[106],"can":[108],"used":[110],"directly":[111],"without":[112],"need":[114],"step.":[119],"Experiments":[120],"on":[121],"datasets":[124],"sentiment":[126],"analysis":[127],"demonstrate":[128],"yields":[132],"best":[134],"predictive":[135],"performance":[136],"when":[137],"compared":[138],"other":[140],"approaches,":[143],"while":[144],"also":[145],"facilitating":[146],"better":[148],"understanding":[149],"words":[152],"phrases":[154],"explain":[156],"given":[158],"task;":[159],"case":[162],"online":[163],"reviews":[164],"ratings":[166],"various":[168],"domains.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":5},{"year":2021,"cited_by_count":1}],"updated_date":"2026-05-21T06:26:12.895304","created_date":"2025-10-10T00:00:00"}
