{"id":"https://openalex.org/W1965350272","doi":"https://doi.org/10.1145/2766462.2767743","title":"An Efficient and Scalable MetaFeature-based Document Classification Approach based on Massively Parallel Computing","display_name":"An Efficient and Scalable MetaFeature-based Document Classification Approach based on Massively Parallel Computing","publication_year":2015,"publication_date":"2015-08-04","ids":{"openalex":"https://openalex.org/W1965350272","doi":"https://doi.org/10.1145/2766462.2767743","mag":"1965350272"},"language":"en","primary_location":{"id":"doi:10.1145/2766462.2767743","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2766462.2767743","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5046683090","display_name":"S\u00e9rgio Canuto","orcid":"https://orcid.org/0000-0003-2973-4158"},"institutions":[{"id":"https://openalex.org/I110200422","display_name":"Universidade Federal de Minas Gerais","ror":"https://ror.org/0176yjw32","country_code":"BR","type":"education","lineage":["https://openalex.org/I110200422"]}],"countries":["BR"],"is_corresponding":true,"raw_author_name":"S\u00e9rgio Canuto","raw_affiliation_strings":["Departament of Computer Science, Federal University of Minas Gerais, Belo Horizonte, Brazil"],"affiliations":[{"raw_affiliation_string":"Departament of Computer Science, Federal University of Minas Gerais, Belo Horizonte, Brazil","institution_ids":["https://openalex.org/I110200422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5046370637","display_name":"Marcos Andr\u00e9 Gon\u00e7alves","orcid":"https://orcid.org/0000-0002-2075-3363"},"institutions":[{"id":"https://openalex.org/I110200422","display_name":"Universidade Federal de Minas Gerais","ror":"https://ror.org/0176yjw32","country_code":"BR","type":"education","lineage":["https://openalex.org/I110200422"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Marcos Gon\u00e7alves","raw_affiliation_strings":["Departament of Computer Science, Federal University of Minas Gerais, Belo Horizonte, Brazil"],"affiliations":[{"raw_affiliation_string":"Departament of Computer Science, Federal University of Minas Gerais, Belo Horizonte, Brazil","institution_ids":["https://openalex.org/I110200422"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044438334","display_name":"W Vieira da Silva Santos","orcid":null},"institutions":[{"id":"https://openalex.org/I68106152","display_name":"Universidade Federal de Goi\u00e1s","ror":"https://ror.org/0039d5757","country_code":"BR","type":"education","lineage":["https://openalex.org/I68106152"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Wisllay Santos","raw_affiliation_strings":["Instituto de Inform\u00e1tica, Universidade Federal de Goi\u00e1s, Goi\u00e1nia, Brazil"],"affiliations":[{"raw_affiliation_string":"Instituto de Inform\u00e1tica, Universidade Federal de Goi\u00e1s, Goi\u00e1nia, Brazil","institution_ids":["https://openalex.org/I68106152"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024659734","display_name":"Thierson Couto Rosa","orcid":"https://orcid.org/0000-0001-7117-3994"},"institutions":[{"id":"https://openalex.org/I68106152","display_name":"Universidade Federal de Goi\u00e1s","ror":"https://ror.org/0039d5757","country_code":"BR","type":"education","lineage":["https://openalex.org/I68106152"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Thierson Rosa","raw_affiliation_strings":["Instituto de Inform\u00e1tica, Universidade Federal de Goi\u00e1s, Goi\u00e1nia, Brazil"],"affiliations":[{"raw_affiliation_string":"Instituto de Inform\u00e1tica, Universidade Federal de Goi\u00e1s, Goi\u00e1nia, Brazil","institution_ids":["https://openalex.org/I68106152"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5033070469","display_name":"Wellington S. Martins","orcid":"https://orcid.org/0000-0002-9641-2565"},"institutions":[{"id":"https://openalex.org/I68106152","display_name":"Universidade Federal de Goi\u00e1s","ror":"https://ror.org/0039d5757","country_code":"BR","type":"education","lineage":["https://openalex.org/I68106152"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"Wellington Martins","raw_affiliation_strings":["Instituto de Inform\u00e1tica, Universidade Federal de Goi\u00e1s, Goi\u00e1nia, Brazil"],"affiliations":[{"raw_affiliation_string":"Instituto de Inform\u00e1tica, Universidade Federal de Goi\u00e1s, Goi\u00e1nia, Brazil","institution_ids":["https://openalex.org/I68106152"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5046683090"],"corresponding_institution_ids":["https://openalex.org/I110200422"],"apc_list":null,"apc_paid":null,"fwci":3.0201,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.92392799,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"333","last_page":"342"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9922999739646912,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9054814577102661},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6570688486099243},{"id":"https://openalex.org/keywords/massively-parallel","display_name":"Massively parallel","score":0.655301570892334},{"id":"https://openalex.org/keywords/speedup","display_name":"Speedup","score":0.6460738778114319},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.49961042404174805},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4707432687282562},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.42434513568878174},{"id":"https://openalex.org/keywords/centroid","display_name":"Centroid","score":0.41445836424827576},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4086382985115051},{"id":"https://openalex.org/keywords/parallel-computing","display_name":"Parallel computing","score":0.2472403347492218},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.21074837446212769}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9054814577102661},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6570688486099243},{"id":"https://openalex.org/C190475519","wikidata":"https://www.wikidata.org/wiki/Q544384","display_name":"Massively parallel","level":2,"score":0.655301570892334},{"id":"https://openalex.org/C68339613","wikidata":"https://www.wikidata.org/wiki/Q1549489","display_name":"Speedup","level":2,"score":0.6460738778114319},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.49961042404174805},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4707432687282562},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.42434513568878174},{"id":"https://openalex.org/C146599234","wikidata":"https://www.wikidata.org/wiki/Q511093","display_name":"Centroid","level":2,"score":0.41445836424827576},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4086382985115051},{"id":"https://openalex.org/C173608175","wikidata":"https://www.wikidata.org/wiki/Q232661","display_name":"Parallel computing","level":1,"score":0.2472403347492218},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.21074837446212769},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/2766462.2767743","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2766462.2767743","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 38th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.4300000071525574,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":22,"referenced_works":["https://openalex.org/W66588809","https://openalex.org/W116067572","https://openalex.org/W1487016832","https://openalex.org/W2021632899","https://openalex.org/W2023294425","https://openalex.org/W2050338028","https://openalex.org/W2051158205","https://openalex.org/W2114535528","https://openalex.org/W2118020653","https://openalex.org/W2118585731","https://openalex.org/W2124592110","https://openalex.org/W2145241906","https://openalex.org/W2149684865","https://openalex.org/W2150102617","https://openalex.org/W2153758664","https://openalex.org/W2170096781","https://openalex.org/W2217894451","https://openalex.org/W2539329681","https://openalex.org/W2539468248","https://openalex.org/W3001645704","https://openalex.org/W3100344990","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2058965144","https://openalex.org/W2164382479","https://openalex.org/W2146343568","https://openalex.org/W98480971","https://openalex.org/W2150291671","https://openalex.org/W2013643406","https://openalex.org/W3138386522","https://openalex.org/W2499279132","https://openalex.org/W2056543843","https://openalex.org/W1974690493"],"abstract_inverted_index":{"The":[0],"unprecedented":[1],"growth":[2],"of":[3,11,24,32,44,56,75,85,92,99,118,126,146,158,184,208,216,229],"available":[4],"data":[5],"nowadays":[6],"has":[7,77,111],"stimulated":[8],"the":[9,54,73,83,90,116,135,140,147,159,170,206,227],"development":[10],"new":[12],"methods":[13],"for":[14,162,172],"organizing":[15],"and":[16,105,130,152,165],"extracting":[17],"useful":[18],"knowledge":[19],"from":[20],"this":[21],"immense":[22],"amount":[23],"data.":[25],"Automatic":[26],"Document":[27],"Classification":[28],"(ADC)":[29],"is":[30,53,121,134,169],"one":[31],"such":[33,60],"methods,":[34],"that":[35,94,178,218],"uses":[36],"machine":[37],"learning":[38],"techniques":[39],"to":[40,48,80,137,186,198],"build":[41],"models":[42],"capable":[43],"automatically":[45],"associating":[46],"documents":[47],"well-defined":[49],"semantic":[50],"classes.":[51],"ADC":[52,86],"basis":[55],"many":[57],"important":[58],"applications":[59],"as":[61],"language":[62],"identification,":[63],"sentiment":[64],"analysis,":[65],"recommender":[66],"systems,":[67],"spam":[68],"filtering,":[69],"among":[70],"others.":[71],"Recently,":[72],"use":[74,91,98,228],"meta-features":[76,93,120,210],"been":[78],"shown":[79],"substantially":[81],"improve":[82],"effectiveness":[84],"algorithms.":[87],"In":[88],"particular,":[89],"make":[95],"a":[96,154,199],"combined":[97],"local":[100],"information":[101,107],"(through":[102,108],"kNN-based":[103],"features)":[104],"global":[106],"category":[109],"centroids)":[110],"produced":[112],"promising":[113],"results.":[114],"However,":[115],"generation":[117],"these":[119],"very":[122],"costly":[123],"in":[124,192,213],"terms":[125],"both,":[127],"memory":[128,190],"consumption":[129,191],"runtime":[131],"since":[132],"there":[133],"need":[136],"constantly":[138],"call":[139],"kNN":[141,160],"algorithm.":[142],"We":[143],"take":[144,221],"advantage":[145],"current":[148],"manycore":[149],"GPU":[150],"architecture":[151],"present":[153],"massively":[155],"parallel":[156,201],"version":[157],"algorithm":[161],"highly":[163],"dimensional":[164],"sparse":[166],"datasets":[167],"(which":[168],"case":[171],"ADC).":[173],"Our":[174],"experimental":[175],"results":[176],"show":[177],"we":[179],"can":[180],"obtain":[181],"speedup":[182],"gains":[183],"up":[185,205],"15x":[187],"while":[188],"reducing":[189],"more":[193],"than":[194],"5000x":[195],"when":[196],"compared":[197],"state-of-the-art":[200],"baseline.":[202],"This":[203],"opens":[204],"possibility":[207],"applying":[209],"based":[211],"classification":[212],"large":[214],"collections":[215],"documents,":[217],"would":[219],"otherwise":[220],"too":[222],"much":[223],"time":[224],"or":[225],"require":[226],"an":[230],"expensive":[231],"computational":[232],"platform.":[233]},"counts_by_year":[{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2018,"cited_by_count":4},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
