{"id":"https://openalex.org/W2515761419","doi":"https://doi.org/10.1145/2970398.2970426","title":"Nearest Neighbour based Transformation Functions for Text Classification","display_name":"Nearest Neighbour based Transformation Functions for Text Classification","publication_year":2016,"publication_date":"2016-09-09","ids":{"openalex":"https://openalex.org/W2515761419","doi":"https://doi.org/10.1145/2970398.2970426","mag":"2515761419"},"language":"en","primary_location":{"id":"doi:10.1145/2970398.2970426","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2970398.2970426","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 ACM International Conference on the Theory of Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://doras.dcu.ie/22802/","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5011620904","display_name":"Piyush Arora","orcid":null},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Piyush Arora","raw_affiliation_strings":["School of Computing, DCU, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"School of Computing, DCU, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082339849","display_name":"Debasis Ganguly","orcid":"https://orcid.org/0000-0003-0050-7138"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Debasis Ganguly","raw_affiliation_strings":["School of Computing, DCU, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"School of Computing, DCU, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018481328","display_name":"Gareth J. F. Jones","orcid":"https://orcid.org/0000-0003-2923-8365"},"institutions":[{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Gareth J.F. Jones","raw_affiliation_strings":["School of Computing, DCU, Dublin, Ireland"],"affiliations":[{"raw_affiliation_string":"School of Computing, DCU, Dublin, Ireland","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5011620904"],"corresponding_institution_ids":["https://openalex.org/I42934936"],"apc_list":null,"apc_paid":null,"fwci":0.4417,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.79394792,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"299","last_page":"302"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13274","display_name":"Expert finding and Q&A systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6728260517120361},{"id":"https://openalex.org/keywords/categorization","display_name":"Categorization","score":0.5706725120544434},{"id":"https://openalex.org/keywords/neighbourhood","display_name":"Neighbourhood (mathematics)","score":0.5648958683013916},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.5169905424118042},{"id":"https://openalex.org/keywords/support-vector-machine","display_name":"Support vector machine","score":0.48331117630004883},{"id":"https://openalex.org/keywords/naive-bayes-classifier","display_name":"Naive Bayes classifier","score":0.4765656292438507},{"id":"https://openalex.org/keywords/robustness","display_name":"Robustness (evolution)","score":0.47594714164733887},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4745674431324005},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.46605926752090454},{"id":"https://openalex.org/keywords/vector-space","display_name":"Vector space","score":0.46408435702323914},{"id":"https://openalex.org/keywords/vector-space-model","display_name":"Vector space model","score":0.43992218375205994},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.4137764573097229},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.3924109637737274},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3389623165130615},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.24356427788734436},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.08059033751487732}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6728260517120361},{"id":"https://openalex.org/C94124525","wikidata":"https://www.wikidata.org/wiki/Q912550","display_name":"Categorization","level":2,"score":0.5706725120544434},{"id":"https://openalex.org/C161677786","wikidata":"https://www.wikidata.org/wiki/Q2478475","display_name":"Neighbourhood (mathematics)","level":2,"score":0.5648958683013916},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.5169905424118042},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.48331117630004883},{"id":"https://openalex.org/C52001869","wikidata":"https://www.wikidata.org/wiki/Q812530","display_name":"Naive Bayes classifier","level":3,"score":0.4765656292438507},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.47594714164733887},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4745674431324005},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.46605926752090454},{"id":"https://openalex.org/C13336665","wikidata":"https://www.wikidata.org/wiki/Q125977","display_name":"Vector space","level":2,"score":0.46408435702323914},{"id":"https://openalex.org/C89686163","wikidata":"https://www.wikidata.org/wiki/Q1187982","display_name":"Vector space model","level":2,"score":0.43992218375205994},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.4137764573097229},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.3924109637737274},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3389623165130615},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24356427788734436},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.08059033751487732},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/2970398.2970426","is_oa":false,"landing_page_url":"https://doi.org/10.1145/2970398.2970426","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2016 ACM International Conference on the Theory of Information Retrieval","raw_type":"proceedings-article"},{"id":"pmh:http://www.rian.ie/157869/","is_oa":true,"landing_page_url":"http://doras.dcu.ie/22802/","pdf_url":"http://doras.dcu.ie/22802/","source":{"id":"https://openalex.org/S4306400033","display_name":"Arrow@dit (Dublin Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I115570527","host_organization_name":"Dublin Institute of Technology","host_organization_lineage":["https://openalex.org/I115570527"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Arora, Piyush ORCID: 0000-0002-4261-2860 &lt;https://orcid.org/0000-0002-4261-2860&gt;, Ganguly, Debasis ORCID: 0000-0003-0050-7138 &lt;https://orcid.org/0000-0003-0050-7138&gt; and Jones, Gareth J.F. ORCID: 0000-0003-2923-8365 &lt;https://orcid.org/0000-0003-2923-8365&gt;  (2016) Nearest neighbour based transformation functions for text classification: a case study with StackOverflow.  In: Proceedings of the 2016 ACM International Conference on the Theory of Information Retrieval, 12 - 16 Sept 2016, Newark, Delaware, USA.  ISBN 978-1-4503-4497-5","raw_type":"Other"}],"best_oa_location":{"id":"pmh:http://www.rian.ie/157869/","is_oa":true,"landing_page_url":"http://doras.dcu.ie/22802/","pdf_url":"http://doras.dcu.ie/22802/","source":{"id":"https://openalex.org/S4306400033","display_name":"Arrow@dit (Dublin Institute of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I115570527","host_organization_name":"Dublin Institute of Technology","host_organization_lineage":["https://openalex.org/I115570527"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Arora, Piyush ORCID: 0000-0002-4261-2860 &lt;https://orcid.org/0000-0002-4261-2860&gt;, Ganguly, Debasis ORCID: 0000-0003-0050-7138 &lt;https://orcid.org/0000-0003-0050-7138&gt; and Jones, Gareth J.F. ORCID: 0000-0003-2923-8365 &lt;https://orcid.org/0000-0003-2923-8365&gt;  (2016) Nearest neighbour based transformation functions for text classification: a case study with StackOverflow.  In: Proceedings of the 2016 ACM International Conference on the Theory of Information Retrieval, 12 - 16 Sept 2016, Newark, Delaware, USA.  ISBN 978-1-4503-4497-5","raw_type":"Other"},"sustainable_development_goals":[{"score":0.47999998927116394,"display_name":"Sustainable cities and communities","id":"https://metadata.un.org/sdg/11"}],"awards":[{"id":"https://openalex.org/G3993907298","display_name":null,"funder_award_id":"13/RC/2106","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"},{"id":"https://openalex.org/G8758897139","display_name":null,"funder_award_id":"12/CE/I2267","funder_id":"https://openalex.org/F4320320847","funder_display_name":"Science Foundation Ireland"}],"funders":[{"id":"https://openalex.org/F4320320847","display_name":"Science Foundation Ireland","ror":"https://ror.org/0271asj38"},{"id":"https://openalex.org/F4320320865","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2515761419.pdf","grobid_xml":"https://content.openalex.org/works/W2515761419.grobid-xml"},"referenced_works_count":8,"referenced_works":["https://openalex.org/W2048978851","https://openalex.org/W2085030399","https://openalex.org/W2086530517","https://openalex.org/W2136440798","https://openalex.org/W2140336868","https://openalex.org/W2273196467","https://openalex.org/W2286400365","https://openalex.org/W2949547296"],"related_works":["https://openalex.org/W2063335787","https://openalex.org/W2096728994","https://openalex.org/W2349139068","https://openalex.org/W2064695035","https://openalex.org/W1990330161","https://openalex.org/W1974406477","https://openalex.org/W3161401723","https://openalex.org/W1540114765","https://openalex.org/W2541512317","https://openalex.org/W105748137"],"abstract_inverted_index":{"significant":[0],"increase":[1,79],"in":[2,7,16,61,122],"the":[3,14,33,55,62,76,80,90,99,106,110,123,133,137,161],"number":[4],"of":[5,57,82,92,104,141,148,159],"questions":[6,63,94,111],"question":[8,25,116],"answering":[9],"forums":[10],"has":[11],"led":[12],"to":[13,45,65,88,98,170],"interest":[15],"text":[17,36],"categorization":[18,37],"methods":[19],"for":[20,32,48],"classifying":[21],"a":[22,119],"newly":[23,100],"posted":[24],"as":[26],"good":[27],"(suitable)":[28],"or":[29],"bad":[30],"(otherwise)":[31],"forum.":[34],"Standard":[35],"approaches,":[38],"e.g.":[39],"multinomial":[40],"Naive":[41],"Bayes,":[42],"are":[43,96],"likely":[44],"be":[46],"unsuitable":[47],"this":[49,83],"classification":[50,84,107,166],"task":[51],"because":[52],"of:":[53],"i)":[54],"lack":[56],"sufficient":[58],"informative":[59],"content":[60],"due":[64],"their":[66],"relatively":[67],"short":[68],"length;":[69],"and":[70],"ii)":[71],"considerable":[72],"vocabulary":[73],"overlap":[74],"between":[75],"classes.":[77],"To":[78],"robustness":[81],"task,":[85],"we":[86,113],"propose":[87],"use":[89],"neighbourhood":[91,130],"existing":[93],"which":[95],"similar":[97],"asked":[101],"question.":[102],"Instead":[103],"learning":[105],"boundary":[108],"from":[109,145],"alone,":[112],"transform":[114],"each":[115],"vector":[117,139,146],"into":[118],"different":[120,129],"one":[121],"feature":[124],"space.":[125],"We":[126],"explore":[127],"two":[128],"functions":[131],"using:":[132],"discrete":[134],"term":[135],"space,":[136],"continuous":[138],"space":[140],"real":[142],"numbers":[143],"obtained":[144],"embeddings":[147],"documents.":[149],"Experiments":[150],"conducted":[151],"on":[152],"StackOverflow":[153],"data":[154],"show":[155],"that":[156],"our":[157],"approach":[158],"using":[160],"neighborhood":[162],"transformation":[163],"can":[164],"improve":[165],"accuracy":[167],"by":[168],"up":[169],"about":[171],"8%.":[172]},"counts_by_year":[{"year":2022,"cited_by_count":2},{"year":2017,"cited_by_count":1}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
