{"id":"https://openalex.org/W2899004870","doi":"https://doi.org/10.18653/v1/k18-1037","title":"Evolutionary Data Measures: Understanding the Difficulty of Text Classification Tasks","display_name":"Evolutionary Data Measures: Understanding the Difficulty of Text Classification Tasks","publication_year":2018,"publication_date":"2018-01-01","ids":{"openalex":"https://openalex.org/W2899004870","doi":"https://doi.org/10.18653/v1/k18-1037","mag":"2899004870"},"language":"en","primary_location":{"id":"doi:10.18653/v1/k18-1037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k18-1037","pdf_url":"https://www.aclweb.org/anthology/K18-1037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd Conference on Computational Natural Language Learning","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/K18-1037.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5110242404","display_name":"E. A. Collins","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Edward Collins","raw_affiliation_strings":["Wluper Ltd. London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Wluper Ltd. London, United Kingdom","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5038451521","display_name":"Nikolai Rozanov","orcid":"https://orcid.org/0000-0003-0274-8832"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Nikolai Rozanov","raw_affiliation_strings":["Wluper Ltd. London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Wluper Ltd. London, United Kingdom","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5049285935","display_name":"Bingbing Zhang","orcid":"https://orcid.org/0000-0002-0084-3206"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bingbing Zhang","raw_affiliation_strings":["Wluper Ltd. London, United Kingdom"],"affiliations":[{"raw_affiliation_string":"Wluper Ltd. London, United Kingdom","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5110242404"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.303,"has_fulltext":true,"cited_by_count":25,"citation_normalized_percentile":{"value":0.85567085,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"380","last_page":"391"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11975","display_name":"Evolutionary Algorithms and Applications","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12535","display_name":"Machine Learning and Data Classification","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9954000115394592,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8124465942382812},{"id":"https://openalex.org/keywords/measure","display_name":"Measure (data warehouse)","score":0.716912567615509},{"id":"https://openalex.org/keywords/hyperparameter","display_name":"Hyperparameter","score":0.7082111835479736},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6129085421562195},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6119281053543091},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5731427669525146},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.47139623761177063},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4709504544734955},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4199845492839813},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3215996026992798}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8124465942382812},{"id":"https://openalex.org/C2780009758","wikidata":"https://www.wikidata.org/wiki/Q6804172","display_name":"Measure (data warehouse)","level":2,"score":0.716912567615509},{"id":"https://openalex.org/C8642999","wikidata":"https://www.wikidata.org/wiki/Q4171168","display_name":"Hyperparameter","level":2,"score":0.7082111835479736},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6129085421562195},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6119281053543091},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5731427669525146},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.47139623761177063},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4709504544734955},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4199845492839813},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3215996026992798},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/k18-1037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k18-1037","pdf_url":"https://www.aclweb.org/anthology/K18-1037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd Conference on Computational Natural Language Learning","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/k18-1037","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/k18-1037","pdf_url":"https://www.aclweb.org/anthology/K18-1037.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 22nd Conference on Computational Natural Language Learning","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6700000166893005,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2899004870.pdf","grobid_xml":"https://content.openalex.org/works/W2899004870.grobid-xml"},"referenced_works_count":63,"referenced_works":["https://openalex.org/W46659105","https://openalex.org/W133394232","https://openalex.org/W303217050","https://openalex.org/W1481128830","https://openalex.org/W1507711477","https://openalex.org/W1522301498","https://openalex.org/W1544906348","https://openalex.org/W1572403558","https://openalex.org/W1574447377","https://openalex.org/W1911075424","https://openalex.org/W1968135554","https://openalex.org/W1968969471","https://openalex.org/W2041404167","https://openalex.org/W2047094503","https://openalex.org/W2064675550","https://openalex.org/W2067624665","https://openalex.org/W2068545462","https://openalex.org/W2070246124","https://openalex.org/W2091671846","https://openalex.org/W2099111195","https://openalex.org/W2102785645","https://openalex.org/W2103018059","https://openalex.org/W2113459411","https://openalex.org/W2121502920","https://openalex.org/W2123402141","https://openalex.org/W2125877832","https://openalex.org/W2131774270","https://openalex.org/W2140336868","https://openalex.org/W2143237130","https://openalex.org/W2145063378","https://openalex.org/W2148143831","https://openalex.org/W2151166364","https://openalex.org/W2157331557","https://openalex.org/W2159816857","https://openalex.org/W2167020558","https://openalex.org/W2170240176","https://openalex.org/W2251939518","https://openalex.org/W2251958472","https://openalex.org/W2252335727","https://openalex.org/W2263338482","https://openalex.org/W2295416969","https://openalex.org/W2296073425","https://openalex.org/W2297432279","https://openalex.org/W2493916176","https://openalex.org/W2592170186","https://openalex.org/W2593887162","https://openalex.org/W2595653137","https://openalex.org/W2606347107","https://openalex.org/W2611669587","https://openalex.org/W2759474451","https://openalex.org/W2798384383","https://openalex.org/W2951714314","https://openalex.org/W2963012544","https://openalex.org/W2963578188","https://openalex.org/W2963626623","https://openalex.org/W2963746755","https://openalex.org/W2964046515","https://openalex.org/W2964121744","https://openalex.org/W2978725006","https://openalex.org/W3100570787","https://openalex.org/W3147513489","https://openalex.org/W4251584817","https://openalex.org/W4294238563"],"related_works":["https://openalex.org/W2140186469","https://openalex.org/W4280563792","https://openalex.org/W4318719684","https://openalex.org/W4318559728","https://openalex.org/W3183136280","https://openalex.org/W2775233965","https://openalex.org/W4311551265","https://openalex.org/W4360995913","https://openalex.org/W4381707502","https://openalex.org/W2609418570"],"abstract_inverted_index":{"Classification":[0],"tasks":[1],"are":[2,20,165],"usually":[3],"analysed":[4],"and":[5,80,99,116,137,162],"improved":[6],"through":[7],"new":[8],"model":[9],"architectures":[10],"or":[11],"hyperparameter":[12],"optimisation":[13],"but":[14],"the":[15,31,34,60,108,153],"underlying":[16],"properties":[17,32],"of":[18,33,49,62,71,111,120,156],"datasets":[19,76,98,163],"discovered":[21],"on":[22,142],"an":[23,68],"ad-hoc":[24],"basis":[25],"as":[26],"errors":[27,112],"occur.":[28],"However,":[29],"understanding":[30],"data":[35,92],"is":[36,58,78,125],"crucial":[37],"in":[38,113],"perfecting":[39],"models.":[40],"In":[41],"this":[42,87,131],"paper":[43],"we":[44],"analyse":[45,107],"exactly":[46],"which":[47,77],"characteristics":[48],"a":[50,114,123,148],"dataset":[51,57,115,124],"best":[52,154],"determine":[53],"how":[54,121],"difficult":[55,122],"that":[56,86],"for":[59,73,130],"task":[61],"text":[63,74],"classification.":[64],"We":[65,84,128],"then":[66,146],"propose":[67],"intuitive":[69],"measure":[70,88,102,132,155],"difficulty":[72],"classification":[75],"simple":[79],"fast":[81,118],"to":[82,90,96,106,126,151],"calculate.":[83],"show":[85],"generalises":[89],"unseen":[91],"by":[93,133],"comparing":[94],"it":[95],"stateof-the-art":[97],"results.":[100],"This":[101],"can":[103],"be":[104],"used":[105],"precise":[109],"source":[110],"allows":[117],"estimation":[119],"learn.":[127],"searched":[129],"training":[134],"12":[135],"classical":[136],"neural":[138],"network":[139],"based":[140],"models":[141],"78":[143],"real-world":[144],"datasets,":[145],"use":[147],"genetic":[149],"algorithm":[150],"discover":[152],"difficulty.":[157],"Our":[158],"difficulty-calculating":[159],"code":[160],"1":[161],"2":[164],"publicly":[166],"available.":[167]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
