{"id":"https://openalex.org/W7148896888","doi":"https://doi.org/10.48550/arxiv.2604.01745","title":"Detecting Toxic Language: Ontology and BERT-based Approaches for Bulgarian Text","display_name":"Detecting Toxic Language: Ontology and BERT-based Approaches for Bulgarian Text","publication_year":2026,"publication_date":"2026-04-02","ids":{"openalex":"https://openalex.org/W7148896888","doi":"https://doi.org/10.48550/arxiv.2604.01745"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.01745","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01745","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.01745","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5056954451","display_name":"Melania Berbatova","orcid":"https://orcid.org/0000-0002-2882-6694"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Berbatova, Melania","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5132910293","display_name":"Tsvetoslav Vasev","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Vasev, Tsvetoslav","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5056954451"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9193000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9193000197410583,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.014700000174343586,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.009600000455975533,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bulgarian","display_name":"Bulgarian","score":0.8677999973297119},{"id":"https://openalex.org/keywords/ontology","display_name":"Ontology","score":0.6837999820709229},{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.47909998893737793},{"id":"https://openalex.org/keywords/blocking","display_name":"Blocking (statistics)","score":0.33500000834465027},{"id":"https://openalex.org/keywords/moderation","display_name":"Moderation","score":0.3260999917984009}],"concepts":[{"id":"https://openalex.org/C2780343019","wikidata":"https://www.wikidata.org/wiki/Q7918","display_name":"Bulgarian","level":2,"score":0.8677999973297119},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7433000206947327},{"id":"https://openalex.org/C25810664","wikidata":"https://www.wikidata.org/wiki/Q44325","display_name":"Ontology","level":2,"score":0.6837999820709229},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.47909998893737793},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.42649999260902405},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.41819998621940613},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.40619999170303345},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3718999922275543},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.37070000171661377},{"id":"https://openalex.org/C144745244","wikidata":"https://www.wikidata.org/wiki/Q4927286","display_name":"Blocking (statistics)","level":2,"score":0.33500000834465027},{"id":"https://openalex.org/C93225998","wikidata":"https://www.wikidata.org/wiki/Q1941972","display_name":"Moderation","level":2,"score":0.3260999917984009},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.2904999852180481},{"id":"https://openalex.org/C110875604","wikidata":"https://www.wikidata.org/wiki/Q75","display_name":"The Internet","level":2,"score":0.2827000021934509},{"id":"https://openalex.org/C101293273","wikidata":"https://www.wikidata.org/wiki/Q579716","display_name":"User-generated content","level":3,"score":0.2628999948501587}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.01745","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01745","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.01745","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.01745","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.8026023507118225,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Toxic":[0],"content":[1,67,150],"detection":[2,151],"in":[3,37,81,137],"online":[4,64,97],"communication":[5],"remains":[6],"a":[7,30,87,117,126,138,146],"significant":[8],"challenge,":[9],"with":[10],"current":[11],"solutions":[12],"often":[13],"inadvertently":[14],"blocking":[15],"valuable":[16],"information,":[17],"including":[18],"medical":[19,104],"terms":[20,109],"and":[21,66,108,141],"text":[22,39],"related":[23,110],"to":[24,34,43,111],"minority":[25,112],"groups.":[26],"This":[27],"paper":[28],"presents":[29],"more":[31],"nu-anced":[32],"approach":[33],"identifying":[35],"toxicity":[36],"Bulgarian":[38,82,96],"while":[40],"preserving":[41],"access":[42],"essential":[44],"information.":[45],"The":[46,56,131],"research":[47],"explores":[48],"two":[49],"distinct":[50],"methodologies":[51,58],"for":[52,120],"detecting":[53],"toxic":[54,79,102,121,149],"content.":[55],"developed":[57],"have":[59],"po-tential":[60],"applications":[61],"across":[62,99],"diverse":[63],"platforms":[65],"moderation":[68],"systems.":[69,152],"First,":[70],"we":[71,85],"propose":[72],"an":[73],"ontology":[74],"that":[75,89],"models":[76],"the":[77],"potentially":[78],"words":[80],"language.":[83],"Then,":[84],"compose":[86],"dataset":[88],"comprises":[90],"4,384":[91],"manually":[92],"anno-tated":[93],"sentences":[94],"from":[95],"forums":[98],"four":[100],"categories:":[101],"language,":[103],"terminology,":[105],"non-toxic":[106],"lan-guage,":[107],"communities.":[113],"We":[114],"then":[115],"train":[116],"BERT-based":[118],"model":[119,133],"language":[122],"classification,":[123],"which":[124],"reaches":[125],"0.89":[127],"F1":[128],"macro":[129],"score.":[130],"trained":[132],"is":[134],"directly":[135],"applicable":[136],"real":[139],"environment":[140],"can":[142],"be":[143],"integrated":[144],"as":[145],"com-ponent":[147],"of":[148]},"counts_by_year":[],"updated_date":"2026-04-04T06:15:33.020886","created_date":"2026-04-04T00:00:00"}
