{"id":"https://openalex.org/W4413040045","doi":"https://doi.org/10.3233/shti250927","title":"Can Generative LLMs Help Classify Imbalanced Real-World Data? Exploring Rare Diseases on Social Media","display_name":"Can Generative LLMs Help Classify Imbalanced Real-World Data? Exploring Rare Diseases on Social Media","publication_year":2025,"publication_date":"2025-08-07","ids":{"openalex":"https://openalex.org/W4413040045","doi":"https://doi.org/10.3233/shti250927","pmid":"https://pubmed.ncbi.nlm.nih.gov/40775945"},"language":"en","primary_location":{"id":"doi:10.3233/shti250927","is_oa":true,"landing_page_url":"https://doi.org/10.3233/shti250927","pdf_url":null,"source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://doi.org/10.3233/shti250927","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069132937","display_name":"Emma Le Priol","orcid":"https://orcid.org/0009-0004-0958-6949"},"institutions":[{"id":"https://openalex.org/I4210165077","display_name":"Institut des Maladies G\u00e9n\u00e9tiques Imagine","ror":"https://ror.org/05rq3rb55","country_code":"FR","type":"facility","lineage":["https://openalex.org/I154526488","https://openalex.org/I204730241","https://openalex.org/I4210100892","https://openalex.org/I4210165077"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]},{"id":"https://openalex.org/I154526488","display_name":"Inserm","ror":"https://ror.org/02vjkv261","country_code":"FR","type":"funder","lineage":["https://openalex.org/I154526488"]}],"countries":["FR"],"is_corresponding":true,"raw_author_name":"Emma Le Priol","raw_affiliation_strings":["Clinical Bio-Informatics Laboratory, Universit\u00e9 Paris Cit\u00e9, INSERM UMR 1163, Imagine Institute, Paris, France"],"affiliations":[{"raw_affiliation_string":"Clinical Bio-Informatics Laboratory, Universit\u00e9 Paris Cit\u00e9, INSERM UMR 1163, Imagine Institute, Paris, France","institution_ids":["https://openalex.org/I154526488","https://openalex.org/I204730241","https://openalex.org/I4210165077"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037799674","display_name":"Joseph Le Potier","orcid":"https://orcid.org/0009-0000-5160-7927"},"institutions":[{"id":"https://openalex.org/I4210165077","display_name":"Institut des Maladies G\u00e9n\u00e9tiques Imagine","ror":"https://ror.org/05rq3rb55","country_code":"FR","type":"facility","lineage":["https://openalex.org/I154526488","https://openalex.org/I204730241","https://openalex.org/I4210100892","https://openalex.org/I4210165077"]},{"id":"https://openalex.org/I154526488","display_name":"Inserm","ror":"https://ror.org/02vjkv261","country_code":"FR","type":"funder","lineage":["https://openalex.org/I154526488"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]}],"countries":["FR"],"is_corresponding":false,"raw_author_name":"Juliette Potier","raw_affiliation_strings":["Clinical Bio-Informatics Laboratory, Universit\u00e9 Paris Cit\u00e9, INSERM UMR 1163, Imagine Institute, Paris, France"],"affiliations":[{"raw_affiliation_string":"Clinical Bio-Informatics Laboratory, Universit\u00e9 Paris Cit\u00e9, INSERM UMR 1163, Imagine Institute, Paris, France","institution_ids":["https://openalex.org/I154526488","https://openalex.org/I204730241","https://openalex.org/I4210165077"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5037397269","display_name":"Anita Burgun","orcid":"https://orcid.org/0000-0001-6855-4366"},"institutions":[{"id":"https://openalex.org/I154526488","display_name":"Inserm","ror":"https://ror.org/02vjkv261","country_code":"FR","type":"funder","lineage":["https://openalex.org/I154526488"]},{"id":"https://openalex.org/I204730241","display_name":"Universit\u00e9 Paris Cit\u00e9","ror":"https://ror.org/05f82e368","country_code":"FR","type":"education","lineage":["https://openalex.org/I204730241"]},{"id":"https://openalex.org/I4210165077","display_name":"Institut des Maladies G\u00e9n\u00e9tiques Imagine","ror":"https://ror.org/05rq3rb55","country_code":"FR","type":"facility","lineage":["https://openalex.org/I154526488","https://openalex.org/I204730241","https://openalex.org/I4210100892","https://openalex.org/I4210165077"]},{"id":"https://openalex.org/I4210152232","display_name":"Institute of Informatics of the Slovak Academy of Sciences","ror":"https://ror.org/04jgqpc26","country_code":"SK","type":"facility","lineage":["https://openalex.org/I207624831","https://openalex.org/I4210152232"]}],"countries":["FR","SK"],"is_corresponding":false,"raw_author_name":"Anita Burgun","raw_affiliation_strings":["Clinical Bio-Informatics Laboratory, Universit\u00e9 Paris Cit\u00e9, INSERM UMR 1163, Imagine Institute, Paris, France","Department of Medical Informatics, Necker Hospital, AP-HP"],"affiliations":[{"raw_affiliation_string":"Clinical Bio-Informatics Laboratory, Universit\u00e9 Paris Cit\u00e9, INSERM UMR 1163, Imagine Institute, Paris, France","institution_ids":["https://openalex.org/I154526488","https://openalex.org/I204730241","https://openalex.org/I4210165077"]},{"raw_affiliation_string":"Department of Medical Informatics, Necker Hospital, AP-HP","institution_ids":["https://openalex.org/I4210152232"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5069132937"],"corresponding_institution_ids":["https://openalex.org/I154526488","https://openalex.org/I204730241","https://openalex.org/I4210165077"],"apc_list":null,"apc_paid":null,"fwci":4.9041,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.95125628,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":"329","issue":null,"first_page":"683","last_page":"687"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8751000165939331,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.8751000165939331,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12478","display_name":"Wikis in Education and Collaboration","score":0.8421000242233276,"subfield":{"id":"https://openalex.org/subfields/3315","display_name":"Communication"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11995","display_name":"FinTech, Crowdfunding, Digital Finance","score":0.8033000230789185,"subfield":{"id":"https://openalex.org/subfields/1404","display_name":"Management Information Systems"},"field":{"id":"https://openalex.org/fields/14","display_name":"Business, Management and Accounting"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5463512539863586},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5318173170089722},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.5156658887863159},{"id":"https://openalex.org/keywords/synthetic-data","display_name":"Synthetic data","score":0.5145432353019714},{"id":"https://openalex.org/keywords/generative-model","display_name":"Generative model","score":0.5035976767539978},{"id":"https://openalex.org/keywords/macro","display_name":"Macro","score":0.5004870891571045},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.4762061834335327},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4486525058746338},{"id":"https://openalex.org/keywords/binary-number","display_name":"Binary number","score":0.4276256859302521},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35724514722824097},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.33539316058158875},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.3099057674407959},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.18763628602027893},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1021479070186615},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.07989093661308289}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5463512539863586},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5318173170089722},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.5156658887863159},{"id":"https://openalex.org/C160920958","wikidata":"https://www.wikidata.org/wiki/Q7662746","display_name":"Synthetic data","level":2,"score":0.5145432353019714},{"id":"https://openalex.org/C167966045","wikidata":"https://www.wikidata.org/wiki/Q5532625","display_name":"Generative model","level":3,"score":0.5035976767539978},{"id":"https://openalex.org/C166955791","wikidata":"https://www.wikidata.org/wiki/Q629579","display_name":"Macro","level":2,"score":0.5004870891571045},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.4762061834335327},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4486525058746338},{"id":"https://openalex.org/C48372109","wikidata":"https://www.wikidata.org/wiki/Q3913","display_name":"Binary number","level":2,"score":0.4276256859302521},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35724514722824097},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.33539316058158875},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.3099057674407959},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.18763628602027893},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1021479070186615},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.07989093661308289},{"id":"https://openalex.org/C94375191","wikidata":"https://www.wikidata.org/wiki/Q11205","display_name":"Arithmetic","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D006801","descriptor_name":"Humans","qualifier_ui":null,"qualifier_name":null,"is_major_topic":false},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D009323","descriptor_name":"Natural Language Processing","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D035583","descriptor_name":"Rare Diseases","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":true},{"descriptor_ui":"D035583","descriptor_name":"Rare Diseases","qualifier_ui":"Q000145","qualifier_name":"classification","is_major_topic":true},{"descriptor_ui":"D035583","descriptor_name":"Rare Diseases","qualifier_ui":"Q000175","qualifier_name":"diagnosis","is_major_topic":true},{"descriptor_ui":"D035583","descriptor_name":"Rare Diseases","qualifier_ui":"Q000175","qualifier_name":"diagnosis","is_major_topic":true},{"descriptor_ui":"D061108","descriptor_name":"Social Media","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":true},{"descriptor_ui":"D061108","descriptor_name":"Social Media","qualifier_ui":"Q000706","qualifier_name":"statistics & numerical data","is_major_topic":true}],"locations_count":3,"locations":[{"id":"doi:10.3233/shti250927","is_oa":true,"landing_page_url":"https://doi.org/10.3233/shti250927","pdf_url":null,"source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},{"id":"pmid:40775945","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40775945","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in health technology and informatics","raw_type":null},{"id":"pmh:oai:HAL:hal-05394744v1","is_oa":false,"landing_page_url":"https://inria.hal.science/hal-05394744","pdf_url":null,"source":{"id":"https://openalex.org/S4406922461","display_name":"SPIRE - Sciences Po Institutional REpository","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"MEDINFO 2025, Aug 2025, Taipei, Taiwan. &#x27E8;10.3233/SHTI250927&#x27E9;","raw_type":"Conference papers"}],"best_oa_location":{"id":"doi:10.3233/shti250927","is_oa":true,"landing_page_url":"https://doi.org/10.3233/shti250927","pdf_url":null,"source":{"id":"https://openalex.org/S4210179765","display_name":"Studies in health technology and informatics","issn_l":"0926-9630","issn":["0926-9630","1879-8365"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310318577","host_organization_name":"IOS Press","host_organization_lineage":["https://openalex.org/P4310318577"],"host_organization_lineage_names":["IOS Press"],"type":"book series"},"license":"cc-by-nc","license_id":"https://openalex.org/licenses/cc-by-nc","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Studies in Health Technology and Informatics","raw_type":"book-chapter"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.44999998807907104}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W4365211920","https://openalex.org/W3014948380","https://openalex.org/W4391584540","https://openalex.org/W4380551139","https://openalex.org/W4317695495","https://openalex.org/W4395044357","https://openalex.org/W4287117424","https://openalex.org/W4387506531","https://openalex.org/W2087346071","https://openalex.org/W2967848559"],"abstract_inverted_index":{"Developmental":[0],"and":[1,69,93,114,130],"Epileptic":[2],"Encephalopathies":[3],"(DEEs)":[4],"are":[5],"rare,":[6],"severe":[7],"conditions":[8],"often":[9],"discussed":[10],"by":[11],"families":[12],"on":[13],"social":[14,54],"media,":[15],"offering":[16],"valuable":[17],"insights":[18],"into":[19],"their":[20],"experiences.":[21],"Identifying":[22],"these":[23],"messages":[24],"amidst":[25],"unrelated":[26],"content":[27],"is":[28,132],"crucial":[29],"but":[30],"challenging":[31],"due":[32],"to":[33,101],"data":[34,71,86,99,117,120],"imbalance.":[35],"This":[36],"study":[37],"evaluates":[38],"different":[39],"uses":[40],"of":[41,50,83,97,109],"generative":[42],"large":[43],"language":[44],"models":[45,129],"(LLMs)":[46],"for":[47,73,118],"binary":[48],"classification":[49,68],"DEE-related":[51],"experiences":[52],"within":[53],"media":[55],"posts.":[56],"Using":[57],"CamemBERT":[58],"as":[59],"a":[60],"baseline,":[61],"we":[62],"compared":[63],"two":[64],"strategies:":[65],"zero-shot":[66,78],"prompt-based":[67],"synthetic":[70,85,98,116],"generation":[72],"minority":[74],"class":[75],"augmentation.":[76],"While":[77],"prompting":[79],"underperformed,":[80],"the":[81,107],"addition":[82],"2%":[84],"improved":[87],"all":[88],"metrics":[89],"(macro/positive":[90],"F1,":[91],"precision":[92],"recall).":[94],"Higher":[95],"proportions":[96],"led":[100],"decreased":[102],"precision.":[103],"These":[104],"findings":[105],"underscore":[106],"potential":[108],"hybrid":[110],"approaches":[111],"combining":[112],"fine-tuning":[113],"domain-specific":[115],"addressing":[119],"imbalance":[121],"in":[122],"rare":[123],"disease":[124],"contexts.":[125],"Further":[126],"validation":[127],"across":[128],"datasets":[131],"needed.":[133]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
