{"id":"https://openalex.org/W4402643265","doi":"https://doi.org/10.1007/978-3-031-72381-0_8","title":"Balancing Imbalanced Toxicity Models: Using MolBERT with\u00a0Focal Loss","display_name":"Balancing Imbalanced Toxicity Models: Using MolBERT with\u00a0Focal Loss","publication_year":2024,"publication_date":"2024-09-19","ids":{"openalex":"https://openalex.org/W4402643265","doi":"https://doi.org/10.1007/978-3-031-72381-0_8"},"language":"en","primary_location":{"id":"doi:10.1007/978-3-031-72381-0_8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-031-72381-0_8","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-72381-0_8.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"type":"book-chapter","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-72381-0_8.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030874161","display_name":"Muhammad Arslan Masood","orcid":"https://orcid.org/0000-0002-9190-3023"},"institutions":[{"id":"https://openalex.org/I137982388","display_name":"Janssen (Belgium)","ror":"https://ror.org/04yzcpd71","country_code":"BE","type":"company","lineage":["https://openalex.org/I1330063522","https://openalex.org/I137982388"]},{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["BE","FI"],"is_corresponding":true,"raw_author_name":"Muhammad Arslan Masood","raw_affiliation_strings":["Department of Computer Science, Aalto University, Espoo, Finland","Drug Discovery Data Sciences, Janssen Pharmaceutica NV, Turnhoutseweg 30, 2340, Beerse, Belgium"],"raw_orcid":"https://orcid.org/0000-0002-9190-3023","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Drug Discovery Data Sciences, Janssen Pharmaceutica NV, Turnhoutseweg 30, 2340, Beerse, Belgium","institution_ids":["https://openalex.org/I137982388"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5018305257","display_name":"Samuel Kaski","orcid":"https://orcid.org/0000-0003-1925-9154"},"institutions":[{"id":"https://openalex.org/I28407311","display_name":"University of Manchester","ror":"https://ror.org/027m9bs27","country_code":"GB","type":"education","lineage":["https://openalex.org/I28407311"]},{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI","GB"],"is_corresponding":false,"raw_author_name":"Samuel Kaski","raw_affiliation_strings":["Department of Computer Science, Aalto University, Espoo, Finland","Department of Computer Science, University of Manchester, Manchester, UK"],"raw_orcid":"https://orcid.org/0000-0003-1925-9154","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]},{"raw_affiliation_string":"Department of Computer Science, University of Manchester, Manchester, UK","institution_ids":["https://openalex.org/I28407311"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019296124","display_name":"Hugo Ceulemans","orcid":"https://orcid.org/0000-0002-7059-4399"},"institutions":[{"id":"https://openalex.org/I137982388","display_name":"Janssen (Belgium)","ror":"https://ror.org/04yzcpd71","country_code":"BE","type":"company","lineage":["https://openalex.org/I1330063522","https://openalex.org/I137982388"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Hugo Ceulemans","raw_affiliation_strings":["Drug Discovery Data Sciences, Janssen Pharmaceutica NV, Turnhoutseweg 30, 2340, Beerse, Belgium"],"raw_orcid":"https://orcid.org/0000-0002-7059-4399","affiliations":[{"raw_affiliation_string":"Drug Discovery Data Sciences, Janssen Pharmaceutica NV, Turnhoutseweg 30, 2340, Beerse, Belgium","institution_ids":["https://openalex.org/I137982388"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003314826","display_name":"Dorota Herman","orcid":"https://orcid.org/0000-0001-7836-8989"},"institutions":[{"id":"https://openalex.org/I137982388","display_name":"Janssen (Belgium)","ror":"https://ror.org/04yzcpd71","country_code":"BE","type":"company","lineage":["https://openalex.org/I1330063522","https://openalex.org/I137982388"]}],"countries":["BE"],"is_corresponding":false,"raw_author_name":"Dorota Herman","raw_affiliation_strings":["Drug Discovery Data Sciences, Janssen Pharmaceutica NV, Turnhoutseweg 30, 2340, Beerse, Belgium"],"raw_orcid":"https://orcid.org/0000-0001-7836-8989","affiliations":[{"raw_affiliation_string":"Drug Discovery Data Sciences, Janssen Pharmaceutica NV, Turnhoutseweg 30, 2340, Beerse, Belgium","institution_ids":["https://openalex.org/I137982388"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101444999","display_name":"Markus Heinonen","orcid":"https://orcid.org/0000-0002-7741-2279"},"institutions":[{"id":"https://openalex.org/I9927081","display_name":"Aalto University","ror":"https://ror.org/020hwjq30","country_code":"FI","type":"education","lineage":["https://openalex.org/I9927081"]}],"countries":["FI"],"is_corresponding":false,"raw_author_name":"Markus Heinonen","raw_affiliation_strings":["Department of Computer Science, Aalto University, Espoo, Finland"],"raw_orcid":"https://orcid.org/0000-0002-7741-2279","affiliations":[{"raw_affiliation_string":"Department of Computer Science, Aalto University, Espoo, Finland","institution_ids":["https://openalex.org/I9927081"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030874161"],"corresponding_institution_ids":["https://openalex.org/I137982388","https://openalex.org/I9927081"],"apc_list":{"value":5000,"currency":"EUR","value_usd":5392},"apc_paid":{"value":5000,"currency":"EUR","value_usd":5392},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.47474296,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"82","last_page":"97"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11235","display_name":"Statistical Methods in Clinical Trials","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11235","display_name":"Statistical Methods in Clinical Trials","score":0.9952999949455261,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10845","display_name":"Advanced Causal Inference Techniques","score":0.949400007724762,"subfield":{"id":"https://openalex.org/subfields/2613","display_name":"Statistics and Probability"},"field":{"id":"https://openalex.org/fields/26","display_name":"Mathematics"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10211","display_name":"Computational Drug Discovery Methods","score":0.9487000107765198,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8382577896118164},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3607597351074219}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8382577896118164},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3607597351074219}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1007/978-3-031-72381-0_8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-031-72381-0_8","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-72381-0_8.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},{"id":"pmh:oai:pure.atira.dk:openaire_cris_publications/c758a8de-319a-478d-850d-79c15f4c3e6f","is_oa":false,"landing_page_url":"https://research.manchester.ac.uk/en/publications/c758a8de-319a-478d-850d-79c15f4c3e6f","pdf_url":null,"source":{"id":"https://openalex.org/S4306400662","display_name":"Research Explorer (The University of Manchester)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I28407311","host_organization_name":"University of Manchester","host_organization_lineage":["https://openalex.org/I28407311"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Masood, M A, Kaski, S, Ceulemans, H, Herman, D & Heinonen, M 2025, Balancing Imbalanced Toxicity Models : Using MolBERT with\u00a0Focal Loss. in AI in Drug Discovery - 1st International Workshop, AIDD 2024, Held in Conjunction with ICANN 2024, Proceedings. Springer Japan, pp. 82-97, International Workshop on AI in Drug Discovery, Switzerland, 19/09/24. https://doi.org/10.1007/978-3-031-72381-0_8","raw_type":"info:eu-repo/semantics/publishedVersion"},{"id":"pmh:oai:aaltodoc.aalto.fi:123456789/131368","is_oa":true,"landing_page_url":"https://research.aalto.fi/en/publications/e46451b4-f965-413a-8477-994539a6cbf0","pdf_url":null,"source":{"id":"https://openalex.org/S4306401663","display_name":"Aaltodoc (Aalto University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I9927081","host_organization_name":"Aalto University","host_organization_lineage":["https://openalex.org/I9927081"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"publishedVersion"}],"best_oa_location":{"id":"doi:10.1007/978-3-031-72381-0_8","is_oa":true,"landing_page_url":"https://doi.org/10.1007/978-3-031-72381-0_8","pdf_url":"https://link.springer.com/content/pdf/10.1007/978-3-031-72381-0_8.pdf","source":{"id":"https://openalex.org/S106296714","display_name":"Lecture notes in computer science","issn_l":"0302-9743","issn":["0302-9743","1611-3349"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"book series"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Lecture Notes in Computer Science","raw_type":"book-chapter"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G223242278","display_name":null,"funder_award_id":"956832","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"},{"id":"https://openalex.org/G7842005466","display_name":null,"funder_award_id":"Horizon 2020","funder_id":"https://openalex.org/F4320320300","funder_display_name":"European Commission"}],"funders":[{"id":"https://openalex.org/F4320320300","display_name":"European Commission","ror":"https://ror.org/00k4n6c32"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4402643265.pdf","grobid_xml":"https://content.openalex.org/works/W4402643265.grobid-xml"},"referenced_works_count":44,"referenced_works":["https://openalex.org/W1975147762","https://openalex.org/W1976526581","https://openalex.org/W1979392432","https://openalex.org/W1988037271","https://openalex.org/W1988499440","https://openalex.org/W2043738708","https://openalex.org/W2088059023","https://openalex.org/W2092126505","https://openalex.org/W2099536383","https://openalex.org/W2109879824","https://openalex.org/W2110820547","https://openalex.org/W2121897276","https://openalex.org/W2145578524","https://openalex.org/W2147890019","https://openalex.org/W2148143831","https://openalex.org/W2148804536","https://openalex.org/W2234529989","https://openalex.org/W2790505160","https://openalex.org/W2804256492","https://openalex.org/W2901476322","https://openalex.org/W2907473220","https://openalex.org/W2914415092","https://openalex.org/W2963351448","https://openalex.org/W2969509647","https://openalex.org/W2973114758","https://openalex.org/W2974029770","https://openalex.org/W3002514484","https://openalex.org/W3012107310","https://openalex.org/W3021897254","https://openalex.org/W3032781902","https://openalex.org/W3088417492","https://openalex.org/W3127521151","https://openalex.org/W3161286913","https://openalex.org/W3172949370","https://openalex.org/W3198212015","https://openalex.org/W4224951917","https://openalex.org/W4226159083","https://openalex.org/W4297179162","https://openalex.org/W4298009900","https://openalex.org/W4360948819","https://openalex.org/W4385690438","https://openalex.org/W4393169482","https://openalex.org/W6702248584","https://openalex.org/W6752934852"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052","https://openalex.org/W2382290278","https://openalex.org/W4395014643"],"abstract_inverted_index":{"Abstract":[0],"Drug-induced":[1],"liver":[2],"injury":[3],"(DILI)":[4],"presents":[5],"a":[6,40,65],"multifaceted":[7],"challenge,":[8],"influenced":[9],"by":[10,19],"interconnected":[11],"biological":[12],"mechanisms.":[13],"Current":[14],"DILI":[15],"datasets":[16],"are":[17],"characterized":[18],"small":[20],"sizes":[21],"and":[22,31,50,75,88,108],"high":[23],"imbalance,":[24],"posing":[25],"difficulties":[26],"in":[27,72],"learning":[28,71],"robust":[29,70],"representations":[30,63],"accurate":[32],"modeling.":[33],"To":[34,78],"address":[35,79],"these":[36],"challenges,":[37],"we":[38,61,82],"trained":[39],"multi-modal":[41],"multi-task":[42],"model":[43,101],"integrating":[44],"preclinical":[45],"histopathologies,":[46],"biochemistry":[47],"(blood":[48],"markers),":[49],"clinical":[51],"DILI-related":[52],"adverse":[53],"drug":[54],"reactions":[55],"(ADRs).":[56],"Leveraging":[57],"pretrained":[58],"BERT":[59,100,124,136],"models,":[60],"extracted":[62],"covering":[64],"broad":[66],"chemical":[67],"space,":[68],"facilitating":[69],"both":[73],"frozen":[74,99],"fine-tuned":[76],"settings.":[77],"imbalanced":[80],"data,":[81],"explored":[83],"weighted":[84,89,111,139],"Binary":[85],"Cross-Entropy":[86],"(w-BCE)":[87],"Focal":[90],"Loss":[91],"(w-FL)":[92],".":[93],"Our":[94],"results":[95],"demonstrate":[96],"that":[97],"the":[98,120,133,145],"consistently":[102],"enhances":[103],"performance":[104],"across":[105,126],"all":[106],"metrics":[107],"modalities":[109],"with":[110,138],"loss":[112,140],"functions":[113,141],"compared":[114],"to":[115],"their":[116],"non-weighted":[117],"counterparts.":[118],"However,":[119],"efficacy":[121,146],"of":[122,135,147],"fine-tuning":[123,148],"varies":[125],"modalities,":[127],"yielding":[128],"inconclusive":[129],"results.":[130],"In":[131],"summary,":[132],"incorporation":[134],"features":[137],"demonstrates":[142],"advantages,":[143],"while":[144],"remains":[149],"uncertain.":[150]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2025-10-10T00:00:00"}
