{"id":"https://openalex.org/W4390971148","doi":"https://doi.org/10.1109/bibm58861.2023.10385403","title":"Fine-tuning a pre-trained Transformers-based model for gene name entity recognition in biomedical text using a customized dataset: case of Desulfovibrio vulgaris Hildenborough","display_name":"Fine-tuning a pre-trained Transformers-based model for gene name entity recognition in biomedical text using a customized dataset: case of Desulfovibrio vulgaris Hildenborough","publication_year":2023,"publication_date":"2023-12-05","ids":{"openalex":"https://openalex.org/W4390971148","doi":"https://doi.org/10.1109/bibm58861.2023.10385403"},"language":"en","primary_location":{"id":"doi:10.1109/bibm58861.2023.10385403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm58861.2023.10385403","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5065962577","display_name":"Alain Bertrand Bomgni","orcid":"https://orcid.org/0000-0002-3377-7321"},"institutions":[{"id":"https://openalex.org/I189957204","display_name":"University of South Dakota","ror":"https://ror.org/0043h8f16","country_code":"US","type":"education","lineage":["https://openalex.org/I189957204"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Alain Bertrand Bomgni","raw_affiliation_strings":["University of South Dakota,Dept. of Biomedical Engineering,Vermillion,SD,USA","Dept. of Biomedical Engineering, University of South Dakota, Vermillion, SD, USA"],"affiliations":[{"raw_affiliation_string":"University of South Dakota,Dept. of Biomedical Engineering,Vermillion,SD,USA","institution_ids":["https://openalex.org/I189957204"]},{"raw_affiliation_string":"Dept. of Biomedical Engineering, University of South Dakota, Vermillion, SD, USA","institution_ids":["https://openalex.org/I189957204"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111850463","display_name":"D.D. Abdala","orcid":null},"institutions":[{"id":"https://openalex.org/I178331978","display_name":"Universit\u00e9 de Dschang","ror":"https://ror.org/0566t4z20","country_code":"CM","type":"education","lineage":["https://openalex.org/I178331978"]}],"countries":["CM"],"is_corresponding":false,"raw_author_name":"Dialo Abdala","raw_affiliation_strings":["University of Dschang,Dept. of Computer Science,Dschang,Cameroon","Dept. of Computer Science, University of Dschang, Dschang, Cameroon"],"affiliations":[{"raw_affiliation_string":"University of Dschang,Dept. of Computer Science,Dschang,Cameroon","institution_ids":["https://openalex.org/I178331978"]},{"raw_affiliation_string":"Dept. of Computer Science, University of Dschang, Dschang, Cameroon","institution_ids":["https://openalex.org/I178331978"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5091622645","display_name":"Bichar Dip Shrestha Gurung","orcid":"https://orcid.org/0000-0002-9818-5108"},"institutions":[{"id":"https://openalex.org/I189957204","display_name":"University of South Dakota","ror":"https://ror.org/0043h8f16","country_code":"US","type":"education","lineage":["https://openalex.org/I189957204"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Bichar Dip Shrestha Gurung","raw_affiliation_strings":["University of South Dakota,Dept. of Biomedical Engineering,Vermillion,SD,USA","Dept. of Biomedical Engineering, University of South Dakota, Vermillion, SD, USA"],"affiliations":[{"raw_affiliation_string":"University of South Dakota,Dept. of Biomedical Engineering,Vermillion,SD,USA","institution_ids":["https://openalex.org/I189957204"]},{"raw_affiliation_string":"Dept. of Biomedical Engineering, University of South Dakota, Vermillion, SD, USA","institution_ids":["https://openalex.org/I189957204"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064500705","display_name":"Marcellin Nkenlifack","orcid":"https://orcid.org/0000-0003-3322-4687"},"institutions":[{"id":"https://openalex.org/I178331978","display_name":"Universit\u00e9 de Dschang","ror":"https://ror.org/0566t4z20","country_code":"CM","type":"education","lineage":["https://openalex.org/I178331978"]}],"countries":["CM"],"is_corresponding":false,"raw_author_name":"Marcellin Julius Nkenlifack","raw_affiliation_strings":["University of Dschang,Dept. of Computer Science,Dschang,Cameroon","Dept. of Computer Science, University of Dschang, Dschang, Cameroon"],"affiliations":[{"raw_affiliation_string":"University of Dschang,Dept. of Computer Science,Dschang,Cameroon","institution_ids":["https://openalex.org/I178331978"]},{"raw_affiliation_string":"Dept. of Computer Science, University of Dschang, Dschang, Cameroon","institution_ids":["https://openalex.org/I178331978"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050205984","display_name":"Venkataramana Gadhamshetty","orcid":"https://orcid.org/0000-0002-8418-3515"},"institutions":[{"id":"https://openalex.org/I184647316","display_name":"South Dakota School of Mines and Technology","ror":"https://ror.org/00ch7yk27","country_code":"US","type":"education","lineage":["https://openalex.org/I184647316"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Venkataramana Gadhamshetty","raw_affiliation_strings":["South Dakota School of Mines and Technology,Dept. of Civil and Environmental Engineering,Rapid City,SD,USA","Dept. of Civil and Environmental Engineering, South Dakota School of Mines and Technology, Rapid City, SD, USA"],"affiliations":[{"raw_affiliation_string":"South Dakota School of Mines and Technology,Dept. of Civil and Environmental Engineering,Rapid City,SD,USA","institution_ids":["https://openalex.org/I184647316"]},{"raw_affiliation_string":"Dept. of Civil and Environmental Engineering, South Dakota School of Mines and Technology, Rapid City, SD, USA","institution_ids":["https://openalex.org/I184647316"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5059211804","display_name":"Z. Etienne Gnimpieba","orcid":null},"institutions":[{"id":"https://openalex.org/I189957204","display_name":"University of South Dakota","ror":"https://ror.org/0043h8f16","country_code":"US","type":"education","lineage":["https://openalex.org/I189957204"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Z. Etienne Gnimpieba","raw_affiliation_strings":["University of South Dakota,Dept. of Biomedical Engineering,Vermillion,SD,USA","Dept. of Biomedical Engineering, University of South Dakota, Vermillion, SD, USA"],"affiliations":[{"raw_affiliation_string":"University of South Dakota,Dept. of Biomedical Engineering,Vermillion,SD,USA","institution_ids":["https://openalex.org/I189957204"]},{"raw_affiliation_string":"Dept. of Biomedical Engineering, University of South Dakota, Vermillion, SD, USA","institution_ids":["https://openalex.org/I189957204"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5065962577"],"corresponding_institution_ids":["https://openalex.org/I189957204"],"apc_list":null,"apc_paid":null,"fwci":0.9543,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.76819765,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"4473","last_page":"4479"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13083","display_name":"Advanced Text Analysis Techniques","score":0.9282000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/desulfovibrio-vulgaris","display_name":"Desulfovibrio vulgaris","score":0.7925393581390381},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6125746965408325},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.5673470497131348},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4569781422615051},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4181443452835083},{"id":"https://openalex.org/keywords/biology","display_name":"Biology","score":0.1468905508518219},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.12418520450592041},{"id":"https://openalex.org/keywords/genetics","display_name":"Genetics","score":0.08127638697624207},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.07331496477127075}],"concepts":[{"id":"https://openalex.org/C2781243026","wikidata":"https://www.wikidata.org/wiki/Q593811","display_name":"Desulfovibrio vulgaris","level":3,"score":0.7925393581390381},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6125746965408325},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.5673470497131348},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4569781422615051},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4181443452835083},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.1468905508518219},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.12418520450592041},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.08127638697624207},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.07331496477127075},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C523546767","wikidata":"https://www.wikidata.org/wiki/Q10876","display_name":"Bacteria","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm58861.2023.10385403","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm58861.2023.10385403","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2023 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320338440","display_name":"HORIZON EUROPE Health","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2896457183","https://openalex.org/W2911489562","https://openalex.org/W2948909602","https://openalex.org/W2970771982","https://openalex.org/W2971258845","https://openalex.org/W3166593409","https://openalex.org/W3210986045","https://openalex.org/W4205097571","https://openalex.org/W4206639076","https://openalex.org/W4221142212","https://openalex.org/W4313527418","https://openalex.org/W4365511667","https://openalex.org/W4385245566"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2748952813","https://openalex.org/W12517237","https://openalex.org/W1530162221","https://openalex.org/W2037501684","https://openalex.org/W2059405557","https://openalex.org/W1956971709","https://openalex.org/W2166311723","https://openalex.org/W1629309529","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Gene":[0],"Name":[1],"Entity":[2],"Recognition":[3],"(NER)":[4],"plays":[5],"a":[6,106,122,172,223],"crucial":[7],"role":[8,281],"in":[9,31,46,149,208,238,242,286,296],"the":[10,19,32,34,47,68,71,94,115,157,178,202,215,218,232,268,279,293],"realm":[11],"of":[12,23,36,49,70,121,125,217,226,234,282],"biomedical":[13,50,126],"text":[14],"mining":[15],"by":[16],"focusing":[17],"on":[18,58,84,167],"identification":[20],"and":[21,118,163,191,206,253,258],"extraction":[22],"gene":[24,89,209,243,287,306],"references":[25],"from":[26,128],"scientific":[27],"literature.":[28],"Recent":[29],"advancements":[30],"field-particularly":[33],"emergence":[35],"pre-trained":[37,161,298],"transformer-based":[38],"language":[39,299],"models":[40,54,262,300],"like":[41],"BioBERT-have":[42],"shown":[43],"significant":[44,294],"promise":[45],"domain":[48,72],"NER.":[51,244],"However,":[52],"these":[53],"are":[55],"often":[56],"trained":[57],"existing,":[59],"publicly":[60,99],"available":[61,100],"datasets,":[62,101],"which":[63],"may":[64],"not":[65],"fully":[66],"capture":[67],"nuances":[69],"or":[73],"adequately":[74],"cover":[75],"less-studied":[76],"genes.":[77],"This":[78,109],"study":[79,277],"places":[80],"its":[81],"primary":[82],"emphasis":[83],"fine-tuning":[85,153,297],"BioBERT":[86,158,220,303],"specifically":[87,255],"for":[88],"NER":[90,288],"tasks.":[91,289],"To":[92,176],"address":[93],"limitations":[95],"associated":[96],"with":[97,160],"current":[98],"we":[102,181,195],"have":[103,136,145],"meticulously":[104],"crafted":[105],"custom":[107,169,284],"dataset.":[108],"dataset":[110,170,237],"is":[111],"thoughtfully":[112],"constructed":[113],"through":[114,222],"systematic":[116],"collection":[117],"detailed":[119],"annotation":[120],"diverse":[123],"range":[124],"literature":[127],"specialized":[129],"sources.":[130],"It":[131,290],"intentionally":[132],"includes":[133],"genes":[134],"that":[135,144],"been":[137],"extensively":[138],"researched,":[139],"as":[140,142,302],"well":[141],"those":[143],"received":[146],"limited":[147],"attention":[148,192],"existing":[150],"corpora.":[151],"The":[152,229,245],"process":[154],"involves":[155],"initializing":[156],"model":[159,221,247],"weights":[162],"then":[164],"training":[165],"it":[166,264],"our":[168,235],"using":[171],"sequence":[173],"tagging":[174],"approach.":[175],"enhance":[177,305],"model\u2019s":[179,203],"performance,":[180],"systematically":[182],"explore":[183],"various":[184],"techniques,":[185],"including":[186],"data":[187],"augmentation,":[188],"entity-level":[189],"features,":[190],"mechanisms.":[193],"Additionally,":[194],"conduct":[196],"rigorous":[197],"hyperparameter":[198],"optimization":[199],"to":[200,266,304],"maximize":[201],"accuracy,":[204],"precision,":[205,252],"recall":[207],"mention":[210,307],"recognition.":[211,308],"We":[212],"thoroughly":[213],"evaluate":[214],"performance":[216,241],"fine-tuned":[219,246],"comprehensive":[224],"set":[225],"cross-validation":[227],"experiments.":[228],"results":[230],"highlight":[231],"effectiveness":[233],"tailored":[236],"enhancing":[239],"BioBERT\u2019s":[240],"achieves":[248],"impressive":[249],"F1":[250],"scores,":[251],"recall,":[254],"0.96,":[256],"0.95,":[257],"0.98,":[259],"surpassing":[260],"previous":[261],"when":[263],"comes":[265],"recognizing":[267],"dvu":[269],"(Desulfovibrio":[270],"vulgaris":[271],"Hildenborough)":[272],"gene.":[273],"In":[274],"conclusion,":[275],"this":[276],"underscores":[278],"pivotal":[280],"domain-specific,":[283],"datasets":[285],"also":[291],"highlights":[292],"potential":[295],"such":[301]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
