{"id":"https://openalex.org/W2153429513","doi":"https://doi.org/10.1017/s1351324912000381","title":"Coping with highly imbalanced datasets: A case study with definition extraction in a multilingual setting","display_name":"Coping with highly imbalanced datasets: A case study with definition extraction in a multilingual setting","publication_year":2013,"publication_date":"2013-02-11","ids":{"openalex":"https://openalex.org/W2153429513","doi":"https://doi.org/10.1017/s1351324912000381","mag":"2153429513"},"language":"en","primary_location":{"id":"doi:10.1017/s1351324912000381","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324912000381","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"http://dx.doi.org/10.1017/S1351324912000381","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004136622","display_name":"Rosa Del Gaudio","orcid":null},"institutions":[{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":true,"raw_author_name":"ROSA DEL GAUDIO","raw_affiliation_strings":["Faculdade de Ci\u00eancias, Departamento de Inform\u00e1tica, University of Lisbon, Campo Grande, 1749-016 Lisboa, Portugal e-mails:"],"affiliations":[{"raw_affiliation_string":"Faculdade de Ci\u00eancias, Departamento de Inform\u00e1tica, University of Lisbon, Campo Grande, 1749-016 Lisboa, Portugal e-mails:","institution_ids":["https://openalex.org/I141596103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029634834","display_name":"Gustavo Batista","orcid":"https://orcid.org/0000-0002-3482-8442"},"institutions":[{"id":"https://openalex.org/I17974374","display_name":"Universidade de S\u00e3o Paulo","ror":"https://ror.org/036rp1748","country_code":"BR","type":"education","lineage":["https://openalex.org/I17974374"]}],"countries":["BR"],"is_corresponding":false,"raw_author_name":"GUSTAVO BATISTA","raw_affiliation_strings":["Department of Computer Science, University of S\u00e3o Paulo, PO Box 668, 13560-970 S\u00e3o Carlos, SP, Brazil e-mail:"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, University of S\u00e3o Paulo, PO Box 668, 13560-970 S\u00e3o Carlos, SP, Brazil e-mail:","institution_ids":["https://openalex.org/I17974374"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5030134463","display_name":"Antonio C.S. Branco","orcid":null},"institutions":[{"id":"https://openalex.org/I141596103","display_name":"University of Lisbon","ror":"https://ror.org/01c27hj86","country_code":"PT","type":"education","lineage":["https://openalex.org/I141596103"]}],"countries":["PT"],"is_corresponding":false,"raw_author_name":"ANT\u00d3NIO BRANCO","raw_affiliation_strings":["Faculdade de Ci\u00eancias, Departamento de Inform\u00e1tica, University of Lisbon, Campo Grande, 1749-016 Lisboa, Portugal e-mails:"],"affiliations":[{"raw_affiliation_string":"Faculdade de Ci\u00eancias, Departamento de Inform\u00e1tica, University of Lisbon, Campo Grande, 1749-016 Lisboa, Portugal e-mails:","institution_ids":["https://openalex.org/I141596103"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5004136622"],"corresponding_institution_ids":["https://openalex.org/I141596103"],"apc_list":null,"apc_paid":null,"fwci":4.4833,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.94653781,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"20","issue":"3","first_page":"327","last_page":"359"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11652","display_name":"Imbalanced Data Classification Techniques","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9894000291824341,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8990111947059631},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5900054574012756},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.556186318397522},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.513163149356842},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.4757287800312042},{"id":"https://openalex.org/keywords/sampling","display_name":"Sampling (signal processing)","score":0.44672146439552307},{"id":"https://openalex.org/keywords/baseline","display_name":"Baseline (sea)","score":0.4405903220176697},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.35820484161376953},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3241754174232483},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.0774419903755188}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8990111947059631},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5900054574012756},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.556186318397522},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.513163149356842},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.4757287800312042},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.44672146439552307},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.4405903220176697},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.35820484161376953},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3241754174232483},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.0774419903755188},{"id":"https://openalex.org/C127313418","wikidata":"https://www.wikidata.org/wiki/Q1069","display_name":"Geology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C159985019","wikidata":"https://www.wikidata.org/wiki/Q181790","display_name":"Composite material","level":1,"score":0.0},{"id":"https://openalex.org/C192562407","wikidata":"https://www.wikidata.org/wiki/Q228736","display_name":"Materials science","level":0,"score":0.0},{"id":"https://openalex.org/C111368507","wikidata":"https://www.wikidata.org/wiki/Q43518","display_name":"Oceanography","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1017/s1351324912000381","is_oa":false,"landing_page_url":"https://doi.org/10.1017/s1351324912000381","pdf_url":null,"source":{"id":"https://openalex.org/S18088403","display_name":"Natural Language Engineering","issn_l":"1351-3249","issn":["1351-3249","1469-8110"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310311721","host_organization_name":"Cambridge University Press","host_organization_lineage":["https://openalex.org/P4310311721","https://openalex.org/P4310311702"],"host_organization_lineage_names":["Cambridge University Press","University of Cambridge"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Natural Language Engineering","raw_type":"journal-article"},{"id":"pmh:oai:www.producao.usp.br:BDPI/45414","is_oa":false,"landing_page_url":"http://www.producao.usp.br/handle/BDPI/45414","pdf_url":null,"source":{"id":"https://openalex.org/S4306401373","display_name":"Scientific Electronic Library Online (S\u00e3o Paulo Research Foundation, Latin American and Caribbean Center on Health Sciences Information, Conselho Nacional de Desenvolvimento Cient\u00edfico e Tecnol\u00f3gico)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I2982035970","host_organization_name":"Costa Rican Department of Social Security","host_organization_lineage":["https://openalex.org/I2982035970"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"","raw_type":"article"},{"id":"pmh:002332065","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1017/S1351324912000381","pdf_url":null,"source":{"id":"https://openalex.org/S4306402641","display_name":"LA Referencia (Red Federada de Repositorios Institucionales de Publicaciones Cient\u00edficas)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4383465926","host_organization_name":"LA Referencia","host_organization_lineage":["https://openalex.org/I4383465926"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"reponame:Reposit\u00f3rio Institucional da USP (Biblioteca Digital da Produ\u00e7\u00e3o Intelectual)","raw_type":"info:eu-repo/semantics/publishedVersion"}],"best_oa_location":{"id":"pmh:002332065","is_oa":true,"landing_page_url":"http://dx.doi.org/10.1017/S1351324912000381","pdf_url":null,"source":{"id":"https://openalex.org/S4306402641","display_name":"LA Referencia (Red Federada de Repositorios Institucionales de Publicaciones Cient\u00edficas)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4383465926","host_organization_name":"LA Referencia","host_organization_lineage":["https://openalex.org/I4383465926"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"reponame:Reposit\u00f3rio Institucional da USP (Biblioteca Digital da Produ\u00e7\u00e3o Intelectual)","raw_type":"info:eu-repo/semantics/publishedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.8199999928474426}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":86,"referenced_works":["https://openalex.org/W405632","https://openalex.org/W18245985","https://openalex.org/W167016754","https://openalex.org/W176515449","https://openalex.org/W199218251","https://openalex.org/W202869703","https://openalex.org/W215122136","https://openalex.org/W218853445","https://openalex.org/W254094495","https://openalex.org/W576293029","https://openalex.org/W1481243620","https://openalex.org/W1513920829","https://openalex.org/W1521843029","https://openalex.org/W1528482621","https://openalex.org/W1548203787","https://openalex.org/W1559196077","https://openalex.org/W1564421318","https://openalex.org/W1570448133","https://openalex.org/W1572990996","https://openalex.org/W1576442155","https://openalex.org/W1584778245","https://openalex.org/W1591261915","https://openalex.org/W1595111739","https://openalex.org/W1680797894","https://openalex.org/W1774901127","https://openalex.org/W1833977909","https://openalex.org/W1912123407","https://openalex.org/W1932571505","https://openalex.org/W1993220166","https://openalex.org/W1994410331","https://openalex.org/W2001068000","https://openalex.org/W2003458432","https://openalex.org/W2023164079","https://openalex.org/W2030623877","https://openalex.org/W2053724458","https://openalex.org/W2060085262","https://openalex.org/W2066066594","https://openalex.org/W2068737686","https://openalex.org/W2071032380","https://openalex.org/W2074523946","https://openalex.org/W2092603896","https://openalex.org/W2100944017","https://openalex.org/W2106670479","https://openalex.org/W2107686700","https://openalex.org/W2111164709","https://openalex.org/W2120040939","https://openalex.org/W2120194796","https://openalex.org/W2122676398","https://openalex.org/W2126702330","https://openalex.org/W2127005418","https://openalex.org/W2137029138","https://openalex.org/W2146141661","https://openalex.org/W2147169507","https://openalex.org/W2148143831","https://openalex.org/W2153635508","https://openalex.org/W2155653793","https://openalex.org/W2162303794","https://openalex.org/W2165086927","https://openalex.org/W2171071968","https://openalex.org/W2214916291","https://openalex.org/W2242564390","https://openalex.org/W2250406935","https://openalex.org/W2250778040","https://openalex.org/W2302382203","https://openalex.org/W2401873644","https://openalex.org/W2404602200","https://openalex.org/W2422606479","https://openalex.org/W2505974315","https://openalex.org/W2548230849","https://openalex.org/W2597693253","https://openalex.org/W2612706554","https://openalex.org/W2787263876","https://openalex.org/W2911863610","https://openalex.org/W2911964244","https://openalex.org/W2914453346","https://openalex.org/W2966207845","https://openalex.org/W2997833137","https://openalex.org/W4237195092","https://openalex.org/W4244238212","https://openalex.org/W4247798019","https://openalex.org/W4390785577","https://openalex.org/W6634499689","https://openalex.org/W6635474240","https://openalex.org/W6635666727","https://openalex.org/W6648516887","https://openalex.org/W6688612899"],"related_works":["https://openalex.org/W2383111961","https://openalex.org/W2365952365","https://openalex.org/W2352448290","https://openalex.org/W2380820513","https://openalex.org/W2913146933","https://openalex.org/W2372385138","https://openalex.org/W4296359239","https://openalex.org/W2043093291","https://openalex.org/W2101155126","https://openalex.org/W2363545964"],"abstract_inverted_index":{"Abstract":[0],"This":[1],"paper":[2,109],"addresses":[3],"the":[4,27,30,42,73,80,95,107,115,125,129,138,149],"task":[5],"of":[6,9,29,32,41,45,64,67,75,77,97,104,127,132,151,155],"automatic":[7,43],"extraction":[8,44],"definitions":[10],"by":[11,24,47,141],"thoroughly":[12],"exploring":[13,128],"an":[14],"approach":[15],"that":[16,122,157],"solely":[17],"relies":[18],"on":[19,26,159],"machine":[20],"learning":[21],"techniques,":[22],"and":[23,49,56,90,145],"focusing":[25],"issue":[28],"imbalance":[31],"relevant":[33],"datasets.":[34],"We":[35],"obtained":[36],"a":[37,62,85,112,152],"breakthrough":[38],"in":[39,72,118],"terms":[40],"definitions,":[46],"extensively":[48],"systematically":[50],"experimenting":[51],"with":[52,84,101],"different":[53,65,102],"sampling":[54,134],"techniques":[55,135],"their":[57],"combination,":[58],"as":[59,61],"well":[60],"range":[63,74,154],"types":[66],"classifiers.":[68],"Performance":[69],"consistently":[70],"scored":[71],"0.95\u20130.99":[76],"area":[78],"under":[79],"receiver":[81],"operating":[82],"characteristics,":[83],"notorious":[86],"improvement":[87],"between":[88],"17":[89],"22":[91],"percentage":[92],"points":[93,123],"regarding":[94],"baseline":[96],"0.73\u20130.77,":[98],"for":[99],"datasets":[100],"rates":[103],"imbalance.":[105],"Thus,":[106],"present":[108],"also":[110],"represents":[111],"contribution":[113],"to":[114,136],"seminal":[116],"work":[117],"natural":[119],"language":[120],"processing":[121],"toward":[124],"importance":[126],"research":[130],"path":[131],"applying":[133],"mitigate":[137],"bias":[139],"induced":[140],"highly":[142],"imbalanced":[143],"datasets,":[144],"thus":[146],"greatly":[147],"improving":[148],"performance":[150],"large":[153],"tools":[156],"rely":[158],"them.":[160]},"counts_by_year":[{"year":2023,"cited_by_count":2},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":4},{"year":2013,"cited_by_count":2}],"updated_date":"2026-03-20T23:20:44.827607","created_date":"2016-06-24T00:00:00"}
