{"id":"https://openalex.org/W3135762307","doi":"https://doi.org/10.1145/3446132.3446192","title":"Exploration of a Balanced Reference Corpus with a Wide Variety of Text Mining Tools","display_name":"Exploration of a Balanced Reference Corpus with a Wide Variety of Text Mining Tools","publication_year":2020,"publication_date":"2020-12-24","ids":{"openalex":"https://openalex.org/W3135762307","doi":"https://doi.org/10.1145/3446132.3446192","mag":"3135762307"},"language":"en","primary_location":{"id":"doi:10.1145/3446132.3446192","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3446132.3446192","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 3rd International Conference on Algorithms, Computing and Artificial Intelligence","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5082665913","display_name":"Nicolas Turenne","orcid":"https://orcid.org/0000-0003-1229-5590"},"institutions":[{"id":"https://openalex.org/I12615008","display_name":"Beijing Normal-Hong Kong Baptist University","ror":"https://ror.org/04snvc712","country_code":"CN","type":"education","lineage":["https://openalex.org/I12615008"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nicolas Turenne","raw_affiliation_strings":["BNU-HKBU United International College, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BNU-HKBU United International College, China","institution_ids":["https://openalex.org/I12615008"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5044844079","display_name":"Bokai Xu","orcid":"https://orcid.org/0009-0008-0661-507X"},"institutions":[{"id":"https://openalex.org/I12615008","display_name":"Beijing Normal-Hong Kong Baptist University","ror":"https://ror.org/04snvc712","country_code":"CN","type":"education","lineage":["https://openalex.org/I12615008"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bokai Xu","raw_affiliation_strings":["BNU-HKBU United International College, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BNU-HKBU United International College, China","institution_ids":["https://openalex.org/I12615008"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100357318","display_name":"Xinyue Li","orcid":"https://orcid.org/0000-0001-7362-0532"},"institutions":[{"id":"https://openalex.org/I12615008","display_name":"Beijing Normal-Hong Kong Baptist University","ror":"https://ror.org/04snvc712","country_code":"CN","type":"education","lineage":["https://openalex.org/I12615008"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinyue Li","raw_affiliation_strings":["BNU-HKBU United International College, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BNU-HKBU United International College, China","institution_ids":["https://openalex.org/I12615008"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100593013","display_name":"Xindi Xu","orcid":null},"institutions":[{"id":"https://openalex.org/I12615008","display_name":"Beijing Normal-Hong Kong Baptist University","ror":"https://ror.org/04snvc712","country_code":"CN","type":"education","lineage":["https://openalex.org/I12615008"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xindi Xu","raw_affiliation_strings":["BNU-HKBU United International College, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BNU-HKBU United International College, China","institution_ids":["https://openalex.org/I12615008"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100773328","display_name":"Hongyu Liu","orcid":"https://orcid.org/0000-0002-4628-6388"},"institutions":[{"id":"https://openalex.org/I12615008","display_name":"Beijing Normal-Hong Kong Baptist University","ror":"https://ror.org/04snvc712","country_code":"CN","type":"education","lineage":["https://openalex.org/I12615008"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongyu Liu","raw_affiliation_strings":["BNU-HKBU United International College, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BNU-HKBU United International College, China","institution_ids":["https://openalex.org/I12615008"]}]},{"author_position":"last","author":{"id":null,"display_name":"Xiaolin Zhu","orcid":null},"institutions":[{"id":"https://openalex.org/I12615008","display_name":"Beijing Normal-Hong Kong Baptist University","ror":"https://ror.org/04snvc712","country_code":"CN","type":"education","lineage":["https://openalex.org/I12615008"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaolin Zhu","raw_affiliation_strings":["BNU-HKBU United International College, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"BNU-HKBU United International College, China","institution_ids":["https://openalex.org/I12615008"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I12615008"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.22041898,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9984999895095825,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8429291248321533},{"id":"https://openalex.org/keywords/variety","display_name":"Variety (cybernetics)","score":0.7647947072982788},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6947465538978577},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6560212969779968},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.5861892700195312},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5067654252052307},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4821847975254059},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44034093618392944},{"id":"https://openalex.org/keywords/resource","display_name":"Resource (disambiguation)","score":0.42638084292411804},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.12263822555541992}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8429291248321533},{"id":"https://openalex.org/C136197465","wikidata":"https://www.wikidata.org/wiki/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.7647947072982788},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6947465538978577},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6560212969779968},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.5861892700195312},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5067654252052307},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4821847975254059},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44034093618392944},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.42638084292411804},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.12263822555541992},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3446132.3446192","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3446132.3446192","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 3rd International Conference on Algorithms, Computing and Artificial Intelligence","raw_type":"proceedings-article"},{"id":"pmh:oai:HAL:hal-03373795v1","is_oa":false,"landing_page_url":"https://hal.science/hal-03373795","pdf_url":null,"source":{"id":"https://openalex.org/S4306402512","display_name":"HAL (Le Centre pour la Communication Scientifique Directe)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1294671590","host_organization_name":"Centre National de la Recherche Scientifique","host_organization_lineage":["https://openalex.org/I1294671590"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACAI 2020: 2020 3rd International Conference on Algorithms, Computing and Artificial Intelligence, Dec 2020, Sanya China, France. pp.1-9, &#x27E8;10.1145/3446132.3446192&#x27E9;","raw_type":"info:eu-repo/semantics/conferenceObject"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W572734709","https://openalex.org/W1489181569","https://openalex.org/W1832693441","https://openalex.org/W1880262756","https://openalex.org/W1919055890","https://openalex.org/W1996903695","https://openalex.org/W2011432097","https://openalex.org/W2014516359","https://openalex.org/W2064853889","https://openalex.org/W2093434627","https://openalex.org/W2110491428","https://openalex.org/W2112885819","https://openalex.org/W2131681506","https://openalex.org/W2250966211","https://openalex.org/W2462248047","https://openalex.org/W2483327705","https://openalex.org/W2740602821","https://openalex.org/W2740721704","https://openalex.org/W2762444032","https://openalex.org/W2785219974","https://openalex.org/W2793769796","https://openalex.org/W2799483584","https://openalex.org/W2800524938","https://openalex.org/W2801244916","https://openalex.org/W2884275378","https://openalex.org/W2905382261","https://openalex.org/W2917373730","https://openalex.org/W2982581688","https://openalex.org/W2996363230","https://openalex.org/W4233135949","https://openalex.org/W4247297565","https://openalex.org/W4298358657","https://openalex.org/W4300121351"],"related_works":["https://openalex.org/W2032233321","https://openalex.org/W3121970507","https://openalex.org/W2110028391","https://openalex.org/W54497855","https://openalex.org/W217960748","https://openalex.org/W3125814499","https://openalex.org/W2090827041","https://openalex.org/W2094012830","https://openalex.org/W187246281","https://openalex.org/W2079194830"],"abstract_inverted_index":{"To":[0],"compare":[1],"various":[2],"techniques,":[3],"the":[4,12,45,82,92,130,140,164,167],"same":[5],"platform":[6],"is":[7,39],"generally":[8],"used":[9,159],"into":[10,62],"which":[11],"user":[13],"will":[14],"import":[15],"a":[16,26,30,34,54,109,122,149],"text":[17,135,172],"dataset.":[18],"Another":[19],"approach":[20],"uses":[21],"an":[22],"evaluation":[23],"based":[24],"on":[25],"gold":[27],"standard":[28],"for":[29],"specific":[31],"task,":[32],"but":[33],"balanced":[35,55,146],"common":[36],"language":[37],"corpus":[38,61],"not":[40],"often":[41],"used.":[42],"We":[43,79,143],"choose":[44],"Corpus":[46,51],"of":[47,72,105,151,166,170],"Contemporary":[48],"American":[49],"English":[50],"(COCA)":[52],"as":[53,65,160],"reference":[56],"corpus,":[57],"and":[58,67,75,88,95,99,116],"split":[59],"this":[60],"categories,":[63],"such":[64],"topics":[66],"genres,":[68],"to":[69,101,162],"apply":[70],"families":[71],"feature":[73],"extraction":[74],"machine":[76],"learning":[77],"algorithms.":[78],"found":[80],"that":[81,108,145],"Stanford":[83],"CoreNLP":[84],"method":[85],"was":[86,96],"faster":[87],"more":[89,97],"accurate":[90],"than":[91],"NLTK":[93],"method,":[94],"reliable":[98],"easier":[100],"understand.":[102],"The":[103],"results":[104],"clustering":[106],"show":[107],"higher":[110],"modularity":[111],"influences":[112],"interpretation.":[113],"For":[114],"genre":[115],"topic":[117],"classification,":[118],"all":[119],"techniques":[120],"achieved":[121],"relatively":[123],"high":[124],"score,":[125],"though":[126],"these":[127],"were":[128],"below":[129],"state-of-the-art":[131,171],"scores":[132],"from":[133,148],"challenge":[134],"datasets.":[136],"Na\u00efve":[137],"Bayes":[138],"outperformed":[139],"other":[141],"alternatives.":[142],"hope":[144],"corpora":[147],"variety":[150],"different":[152],"vernacular":[153],"(or":[154],"low-resource)":[155],"languages":[156],"can":[157],"be":[158],"references":[161],"determine":[163],"efficiency":[165],"wide":[168],"diversity":[169],"mining":[173],"tools.":[174]},"counts_by_year":[],"updated_date":"2026-07-02T09:51:11.867554","created_date":"2025-10-10T00:00:00"}
