{"id":"https://openalex.org/W4393864958","doi":"https://doi.org/10.1109/access.2024.3384496","title":"Evaluation and Analysis of Large Language Models for Clinical Text Augmentation and Generation","display_name":"Evaluation and Analysis of Large Language Models for Clinical Text Augmentation and Generation","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4393864958","doi":"https://doi.org/10.1109/access.2024.3384496"},"language":"en","primary_location":{"id":"doi:10.1109/access.2024.3384496","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3384496","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10489969.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10489969.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5103178226","display_name":"Atif Latif","orcid":"https://orcid.org/0009-0007-8384-7272"},"institutions":[{"id":"https://openalex.org/I205490536","display_name":"Dongguk University","ror":"https://ror.org/057q6n778","country_code":"KR","type":"education","lineage":["https://openalex.org/I205490536"]}],"countries":["KR"],"is_corresponding":true,"raw_author_name":"Atif Latif","raw_affiliation_strings":["Department of Artificial Intelligence, Dongguk University, Jung-gu, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence, Dongguk University, Jung-gu, Seoul, South Korea","institution_ids":["https://openalex.org/I205490536"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080664764","display_name":"Jihie Kim","orcid":"https://orcid.org/0000-0003-2358-4021"},"institutions":[{"id":"https://openalex.org/I205490536","display_name":"Dongguk University","ror":"https://ror.org/057q6n778","country_code":"KR","type":"education","lineage":["https://openalex.org/I205490536"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Jihie Kim","raw_affiliation_strings":["College of AI Convergence, Dongguk University, Jung-gu, Seoul, South Korea"],"affiliations":[{"raw_affiliation_string":"College of AI Convergence, Dongguk University, Jung-gu, Seoul, South Korea","institution_ids":["https://openalex.org/I205490536"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5103178226"],"corresponding_institution_ids":["https://openalex.org/I205490536"],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":7.1815,"has_fulltext":true,"cited_by_count":20,"citation_normalized_percentile":{"value":0.97418073,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"1"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.9907000064849854,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11636","display_name":"Artificial Intelligence in Healthcare and Education","score":0.9855999946594238,"subfield":{"id":"https://openalex.org/subfields/2718","display_name":"Health Informatics"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8167515993118286},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6877049207687378},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.5590051412582397},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5486692190170288},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5277422070503235},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4469894766807556},{"id":"https://openalex.org/keywords/training-set","display_name":"Training set","score":0.43886691331863403}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8167515993118286},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6877049207687378},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.5590051412582397},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5486692190170288},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5277422070503235},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4469894766807556},{"id":"https://openalex.org/C51632099","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Training set","level":2,"score":0.43886691331863403},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/access.2024.3384496","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3384496","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10489969.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:5f0446a4b9e942709266a93290953252","is_oa":true,"landing_page_url":"https://doaj.org/article/5f0446a4b9e942709266a93290953252","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"IEEE Access, Vol 12, Pp 48987-48996 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1109/access.2024.3384496","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2024.3384496","pdf_url":"https://ieeexplore.ieee.org/ielx7/6287639/6514899/10489969.pdf","source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.6200000047683716,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G1739134435","display_name":null,"funder_award_id":"IITP-","funder_id":"https://openalex.org/F4320324891","funder_display_name":"Iran Telecommunication Research Center"},{"id":"https://openalex.org/G2629994998","display_name":null,"funder_award_id":"IITP-2024-RS-2023-00254592","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G3742266538","display_name":null,"funder_award_id":"IITP-2024-2020-0-01789","funder_id":"https://openalex.org/F4320328359","funder_display_name":"Ministry of Science and ICT, South Korea"},{"id":"https://openalex.org/G5116673687","display_name":null,"funder_award_id":"IITP-2024-RS-2023-00254592","funder_id":"https://openalex.org/F4320335489","funder_display_name":"Institute for Information and Communications Technology Promotion"}],"funders":[{"id":"https://openalex.org/F4320324891","display_name":"Iran Telecommunication Research Center","ror":"https://ror.org/01a3g2z22"},{"id":"https://openalex.org/F4320328359","display_name":"Ministry of Science and ICT, South Korea","ror":"https://ror.org/01wpjm123"},{"id":"https://openalex.org/F4320335489","display_name":"Institute for Information and Communications Technology Promotion","ror":"https://ror.org/01g0hqq23"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4393864958.pdf","grobid_xml":"https://content.openalex.org/works/W4393864958.grobid-xml"},"referenced_works_count":57,"referenced_works":["https://openalex.org/W1956340063","https://openalex.org/W1977134866","https://openalex.org/W2109974590","https://openalex.org/W2133512280","https://openalex.org/W2150824314","https://openalex.org/W2251658415","https://openalex.org/W2896457183","https://openalex.org/W2911489562","https://openalex.org/W2921249176","https://openalex.org/W2936695845","https://openalex.org/W2937845937","https://openalex.org/W2949736877","https://openalex.org/W2963545917","https://openalex.org/W2965373594","https://openalex.org/W2970796366","https://openalex.org/W2971296908","https://openalex.org/W2998184481","https://openalex.org/W3034999214","https://openalex.org/W3035331128","https://openalex.org/W3100742171","https://openalex.org/W3111372685","https://openalex.org/W3115881617","https://openalex.org/W3135135238","https://openalex.org/W3152320027","https://openalex.org/W3174828871","https://openalex.org/W3191244417","https://openalex.org/W3201244947","https://openalex.org/W4205403018","https://openalex.org/W4210721047","https://openalex.org/W4226323522","https://openalex.org/W4285306300","https://openalex.org/W4287829148","https://openalex.org/W4288089799","https://openalex.org/W4313452733","https://openalex.org/W4318931874","https://openalex.org/W4319301446","https://openalex.org/W4319301505","https://openalex.org/W4319662928","https://openalex.org/W4323244244","https://openalex.org/W4384071683","https://openalex.org/W4385076068","https://openalex.org/W4385570982","https://openalex.org/W4385573325","https://openalex.org/W4386566910","https://openalex.org/W4386840094","https://openalex.org/W4388725043","https://openalex.org/W4389523957","https://openalex.org/W6755207826","https://openalex.org/W6761189655","https://openalex.org/W6761205521","https://openalex.org/W6761672038","https://openalex.org/W6769311223","https://openalex.org/W6769627184","https://openalex.org/W6774569510","https://openalex.org/W6778883912","https://openalex.org/W6845880546","https://openalex.org/W6856669201"],"related_works":["https://openalex.org/W4288365749","https://openalex.org/W2936497627","https://openalex.org/W3013624417","https://openalex.org/W4287826556","https://openalex.org/W3098382480","https://openalex.org/W4287598411","https://openalex.org/W3100913109","https://openalex.org/W3198458223","https://openalex.org/W3126642501","https://openalex.org/W2964413124"],"abstract_inverted_index":{"A":[0],"major":[1],"challenge":[2],"in":[3,17,31,108,118,213,252],"deep":[4],"learning":[5],"(DL)":[6],"model":[7,250],"training":[8,81],"is":[9,14],"data":[10,111,136,142],"scarcity.":[11],"Data":[12],"scarcity":[13],"commonly":[15],"found":[16],"specific":[18],"domains,":[19],"such":[20,46,133],"as":[21,47,134],"clinical":[22,120,146,165,177],"or":[23],"low-resource":[24],"languages,":[25],"that":[26,228,247],"are":[27],"not":[28,164],"vastly":[29],"explored":[30],"AI":[32],"research.":[33],"In":[34,122],"this":[35],"paper,":[36],"we":[37,128,199],"investigate":[38,100],"the":[39,68,80,94,101,115,119,125,150,156,187,217,222,235,238,248],"generation":[40],"capability":[41],"of":[42,79,103,153,221,237,259],"large":[43,104],"language":[44,105,208],"models":[45],"Text-To-Text":[48],"Transfer":[49],"Transformer":[50],"(T5)":[51],"and":[52,54,90,139,158,183,193,266],"Bidirectional":[53],"Auto-Regressive":[55],"Transformers":[56],"(BART)":[57],"for":[58,110,189,261,264,268],"Clinical":[59],"Health-Aware":[60],"Reasoning":[61],"across":[62,175],"Dimensions":[63],"(CHARDAT)":[64],"dataset":[65,157],"by":[66],"applying":[67],"ChatGPT":[69,74,107,126,148,232],"augmentation":[70,112,131,137,143,191,229,233],"technique.":[71],"We":[72,185],"employed":[73,240],"to":[75,93,99,113,124,145],"rephrase":[76],"each":[77],"instance":[78],"set":[82],"into":[83],"conceptually":[84],"similar":[85],"but":[86,163],"semantically":[87],"different":[88,190,206],"samples":[89],"augmented":[91],"them":[92],"dataset.":[95],"This":[96],"study":[97],"aims":[98],"utilization":[102],"models,":[106,209],"particular,":[109],"overcome":[114],"limited":[116],"availability":[117],"domain.":[121],"addition":[123],"augmentation,":[127],"applied":[129],"other":[130],"techniques,":[132],"easy":[135],"(EDA)":[138],"an":[140],"easier":[141],"(AEDA),":[144],"data.":[147],"comprehended":[149],"contextual":[151],"significance":[152],"sentences":[154],"within":[155],"successfully":[159],"modified":[160],"English":[161],"terms":[162],"terms.":[166],"The":[167],"original":[168],"CHARDAT":[169,223],"datasets":[170],"represent":[171],"52":[172],"health":[173],"conditions":[174],"three":[176],"dimensions,":[178],"i.e.,":[179],"Treatments,":[180],"Risk":[181],"Factors,":[182],"Preventions.":[184],"compared":[186],"outputs":[188],"techniques":[192,203],"evaluated":[194],"their":[195,211],"relative":[196],"performance.":[197],"Additionally,":[198],"examined":[200],"how":[201],"these":[202],"perform":[204],"with":[205],"pre-trained":[207],"assessing":[210],"sensitivity":[212],"various":[214],"contexts.":[215],"Despite":[216],"relatively":[218],"small":[219],"size":[220],"dataset,":[224],"our":[225,244],"results":[226],"demonstrated":[227],"methods":[230],"like":[231],"surpassed":[234],"efficiency":[236],"previously":[239],"back-translation":[241],"augmentation.":[242],"Specifically,":[243],"findings":[245],"revealed":[246],"BART":[249],"resulted":[251],"superior":[253],"performance,":[254],"achieving":[255],"a":[256],"rouge":[257],"score":[258],"52.35":[260],"ROUGE-1,":[262],"41.59":[263],"ROUGE-2,":[265],"50.71":[267],"ROUGE-L.":[269]},"counts_by_year":[{"year":2025,"cited_by_count":15},{"year":2024,"cited_by_count":5}],"updated_date":"2026-03-16T09:10:04.655348","created_date":"2025-10-10T00:00:00"}
