{"id":"https://openalex.org/W4392293656","doi":"https://doi.org/10.1007/s40747-024-01378-3","title":"Enhancing medical text detection with vision-language pre-training and efficient segmentation","display_name":"Enhancing medical text detection with vision-language pre-training and efficient segmentation","publication_year":2024,"publication_date":"2024-02-29","ids":{"openalex":"https://openalex.org/W4392293656","doi":"https://doi.org/10.1007/s40747-024-01378-3"},"language":"en","primary_location":{"id":"doi:10.1007/s40747-024-01378-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01378-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01378-3.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01378-3.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100612764","display_name":"Tianyang Li","orcid":"https://orcid.org/0000-0002-6005-0734"},"institutions":[{"id":"https://openalex.org/I179060312","display_name":"Northeast Electric Power University","ror":"https://ror.org/00zqaxa34","country_code":"CN","type":"education","lineage":["https://openalex.org/I179060312"]},{"id":"https://openalex.org/I3131412887","display_name":"Jiangxi University of Technology","ror":"https://ror.org/05k2j8e48","country_code":"CN","type":"education","lineage":["https://openalex.org/I3131412887"]},{"id":"https://openalex.org/I4210109434","display_name":"Xinyu University","ror":"https://ror.org/021xwcd05","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210109434"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Tianyang Li","raw_affiliation_strings":["College of Computer Science and Technology, Northeast Electric Power University, Jilin, 132012, China","Jiangxi New Energy Technology Institute, Xinyu, 33800, Jiangxi, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Northeast Electric Power University, Jilin, 132012, China","institution_ids":["https://openalex.org/I179060312"]},{"raw_affiliation_string":"Jiangxi New Energy Technology Institute, Xinyu, 33800, Jiangxi, China","institution_ids":["https://openalex.org/I3131412887","https://openalex.org/I4210109434"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058403510","display_name":"Jinxu Bai","orcid":"https://orcid.org/0009-0005-9560-402X"},"institutions":[{"id":"https://openalex.org/I179060312","display_name":"Northeast Electric Power University","ror":"https://ror.org/00zqaxa34","country_code":"CN","type":"education","lineage":["https://openalex.org/I179060312"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinxu Bai","raw_affiliation_strings":["College of Computer Science and Technology, Northeast Electric Power University, Jilin, 132012, China"],"raw_orcid":"https://orcid.org/0009-0005-9560-402X","affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Northeast Electric Power University, Jilin, 132012, China","institution_ids":["https://openalex.org/I179060312"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101413958","display_name":"Qingzhu Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I179060312","display_name":"Northeast Electric Power University","ror":"https://ror.org/00zqaxa34","country_code":"CN","type":"education","lineage":["https://openalex.org/I179060312"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingzhu Wang","raw_affiliation_strings":["College of Computer Science and Technology, Northeast Electric Power University, Jilin, 132012, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"College of Computer Science and Technology, Northeast Electric Power University, Jilin, 132012, China","institution_ids":["https://openalex.org/I179060312"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100612764"],"corresponding_institution_ids":["https://openalex.org/I179060312","https://openalex.org/I3131412887","https://openalex.org/I4210109434"],"apc_list":{"value":1320,"currency":"GBP","value_usd":1619},"apc_paid":{"value":1320,"currency":"GBP","value_usd":1619},"fwci":1.9046,"has_fulltext":true,"cited_by_count":8,"citation_normalized_percentile":{"value":0.862854,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"10","issue":"3","first_page":"3995","last_page":"4007"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9797000288963318,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7866458892822266},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7363185882568359},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6713579893112183},{"id":"https://openalex.org/keywords/pyramid","display_name":"Pyramid (geometry)","score":0.6570438742637634},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6520715951919556},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5391836762428284},{"id":"https://openalex.org/keywords/channel","display_name":"Channel (broadcasting)","score":0.5277909636497498},{"id":"https://openalex.org/keywords/residual","display_name":"Residual","score":0.4858800768852234},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.46696096658706665},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4031071662902832},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.0849241316318512}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7866458892822266},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7363185882568359},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6713579893112183},{"id":"https://openalex.org/C142575187","wikidata":"https://www.wikidata.org/wiki/Q3358290","display_name":"Pyramid (geometry)","level":2,"score":0.6570438742637634},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6520715951919556},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5391836762428284},{"id":"https://openalex.org/C127162648","wikidata":"https://www.wikidata.org/wiki/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5277909636497498},{"id":"https://openalex.org/C155512373","wikidata":"https://www.wikidata.org/wiki/Q287450","display_name":"Residual","level":2,"score":0.4858800768852234},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.46696096658706665},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4031071662902832},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.0849241316318512},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1007/s40747-024-01378-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01378-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01378-3.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:d6579a233ee94c83825150be141c5663","is_oa":true,"landing_page_url":"https://doaj.org/article/d6579a233ee94c83825150be141c5663","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Complex & Intelligent Systems, Vol 10, Iss 3, Pp 3995-4007 (2024)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1007/s40747-024-01378-3","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s40747-024-01378-3","pdf_url":"https://link.springer.com/content/pdf/10.1007/s40747-024-01378-3.pdf","source":{"id":"https://openalex.org/S3035462843","display_name":"Complex & Intelligent Systems","issn_l":"2198-6053","issn":["2198-6053","2199-4536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Complex &amp; Intelligent Systems","raw_type":"journal-article"},"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[{"id":"https://openalex.org/G6427682844","display_name":null,"funder_award_id":"BSZT07202107","funder_id":"https://openalex.org/F4320329141","funder_display_name":"Northeast Electric Power University"}],"funders":[{"id":"https://openalex.org/F4320329141","display_name":"Northeast Electric Power University","ror":"https://ror.org/00zqaxa34"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4392293656.pdf"},"referenced_works_count":55,"referenced_works":["https://openalex.org/W1903029394","https://openalex.org/W1981276685","https://openalex.org/W1988461287","https://openalex.org/W2144554289","https://openalex.org/W2193145675","https://openalex.org/W2194775991","https://openalex.org/W2339589954","https://openalex.org/W2343052201","https://openalex.org/W2519818067","https://openalex.org/W2550687635","https://openalex.org/W2560023338","https://openalex.org/W2593539516","https://openalex.org/W2604735854","https://openalex.org/W2605982830","https://openalex.org/W2784050770","https://openalex.org/W2810028092","https://openalex.org/W2875814315","https://openalex.org/W2953894958","https://openalex.org/W2962804639","https://openalex.org/W2962810613","https://openalex.org/W2963150697","https://openalex.org/W2963161243","https://openalex.org/W2963299604","https://openalex.org/W2963334022","https://openalex.org/W2963353821","https://openalex.org/W2963398399","https://openalex.org/W2963495494","https://openalex.org/W2963529609","https://openalex.org/W2963647456","https://openalex.org/W2963840241","https://openalex.org/W2963977642","https://openalex.org/W2964294787","https://openalex.org/W2965638232","https://openalex.org/W2966926453","https://openalex.org/W2967155990","https://openalex.org/W2967615747","https://openalex.org/W2970231061","https://openalex.org/W2991626090","https://openalex.org/W2998356391","https://openalex.org/W2998621280","https://openalex.org/W3034514377","https://openalex.org/W3035679705","https://openalex.org/W3090449556","https://openalex.org/W3101769104","https://openalex.org/W3102695566","https://openalex.org/W3106228955","https://openalex.org/W3106250896","https://openalex.org/W3148703355","https://openalex.org/W3184364189","https://openalex.org/W3186906052","https://openalex.org/W4214922754","https://openalex.org/W4313172891","https://openalex.org/W4372267020","https://openalex.org/W6600617704","https://openalex.org/W6685165745"],"related_works":["https://openalex.org/W2560215812","https://openalex.org/W2949601986","https://openalex.org/W2788972299","https://openalex.org/W2521347458","https://openalex.org/W3209312100","https://openalex.org/W2990636717","https://openalex.org/W2972212393","https://openalex.org/W4206776094","https://openalex.org/W3154920669","https://openalex.org/W3121197456"],"abstract_inverted_index":{"Abstract":[0],"Detecting":[1],"text":[2,26,43,252,272],"within":[3],"medical":[4,271],"images":[5],"presents":[6],"a":[7,58,64,85,117,138,155,160,196,200,221],"formidable":[8],"challenge":[9],"in":[10,251,285],"the":[11,18,24,29,77,92,99,125,128,144,148,173,184,205,211,216,247,257],"domain":[12],"of":[13,21,32,147,183,262],"computer":[14],"vision":[15],"due":[16],"to":[17,47,164,186,208,287],"intricate":[19],"nature":[20],"textual":[22],"backgrounds,":[23],"dense":[25],"concentration,":[27],"and":[28,41,62,88,98,120,181,241,249],"possible":[30],"existence":[31],"extreme":[33],"aspect":[34],"ratios.":[35],"This":[36,235],"paper":[37],"introduces":[38],"an":[39,54],"effective":[40],"precise":[42],"detection":[44,253],"system":[45,52],"tailored":[46,191],"address":[48],"these":[49],"challenges.":[50],"The":[51,151,260],"incorporates":[53,159],"optimized":[55],"segmentation":[56,71,170,193,206,258],"module,":[57,82,127,158],"trainable":[59],"post-processing":[60,197],"method,":[61],"leverages":[63],"vision-language":[65,222],"pre-training":[66,223],"model":[67,224,237,265],"(oCLIP).":[68],"Specifically,":[69],"our":[70,263],"head":[72],"integrates":[73],"three":[74],"essential":[75],"components:":[76],"Feature":[78,94,101],"Pyramid":[79],"Network":[80],"(FPN)":[81],"which":[83],"combines":[84],"residual":[86,118],"structure":[87,119],"channel":[89,121,139],"attention":[90,122,140,162],"mechanism;":[91],"Efficient":[93],"Enhancement":[95],"Module":[96],"(EFEM);":[97],"Multi-Scale":[100],"Fusion":[102],"with":[103,133,256],"RSEConv":[104,134],"(MSFM-RSE),":[105],"designed":[106,153],"specifically":[107],"for":[108,192],"multi-scale":[109],"feature":[110,149],"fusion":[111],"based":[112],"on":[113,215,229,270],"RSEConv.":[114],"By":[115],"introducing":[116],"mechanism":[123,163],"into":[124],"FPN":[126],"convolutional":[129],"layers":[130,135],"are":[131],"replaced":[132],"that":[136,225],"employ":[137],"mechanism,":[141],"further":[142,244],"augmenting":[143],"representational":[145],"capacity":[146],"maps.":[150],"EFEM,":[152],"as":[154],"cascaded":[156],"U-shaped":[157],"spatial":[161],"introduce":[165,220],"multi-level":[166],"information,":[167],"thereby":[168],"enhancing":[169],"performance.":[171],"Subsequently,":[172],"MSFM-RSE":[174],"adeptly":[175],"amalgamates":[176],"features":[177,190],"from":[178],"various":[179,230],"depths":[180],"scales":[182],"EFEM":[185],"generate":[187],"comprehensive":[188],"final":[189],"purposes.":[194],"Additionally,":[195],"module":[198],"employs":[199],"differentiable":[201],"binarization":[202,212],"strategy,":[203],"allowing":[204],"network":[207],"dynamically":[209],"determine":[210],"threshold.":[213],"Building":[214],"system\u2019s":[217],"improvement,":[218],"we":[219],"undergoes":[226],"extensive":[227],"training":[228],"visual":[231,240],"language":[232],"understanding":[233],"tasks.":[234],"pre-trained":[236],"acquires":[238],"detailed":[239],"semantic":[242],"representations,":[243],"reinforcing":[245],"both":[246],"accuracy":[248],"robustness":[250],"when":[254],"integrated":[255],"module.":[259],"performance":[261,284],"proposed":[264],"was":[266],"evaluated":[267],"through":[268],"experiments":[269,280],"image":[273],"datasets,":[274],"demonstrating":[275],"excellent":[276],"results.":[277],"Multiple":[278],"benchmark":[279],"validate":[281],"its":[282],"superior":[283],"comparison":[286],"existing":[288],"methods.":[289],"Code":[290],"is":[291],"available":[292],"at:":[293],"https://github.com/csworkcode/VLDBNet":[294],".":[295]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
