{"id":"https://openalex.org/W7116416054","doi":"https://doi.org/10.1109/access.2025.3646481","title":"Development of Deep Learning Methods for Visual Document Classification Using Hybrid Vision Transformer\u2013EfficientNet Architecture","display_name":"Development of Deep Learning Methods for Visual Document Classification Using Hybrid Vision Transformer\u2013EfficientNet Architecture","publication_year":2025,"publication_date":"2025-12-19","ids":{"openalex":"https://openalex.org/W7116416054","doi":"https://doi.org/10.1109/access.2025.3646481"},"language":null,"primary_location":{"id":"doi:10.1109/access.2025.3646481","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3646481","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1109/access.2025.3646481","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5106607733","display_name":"Medet Ashimgaliyev","orcid":null},"institutions":[{"id":"https://openalex.org/I10232997","display_name":"L. N. Gumilyov Eurasian National University","ror":"https://ror.org/0242cby63","country_code":"KZ","type":"education","lineage":["https://openalex.org/I10232997"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Medet Ashimgaliyev","raw_affiliation_strings":["Faculty of Information Technologies, L. N. Gumilyov Eurasian National University, Astana, Kazakhstan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Technologies, L. N. Gumilyov Eurasian National University, Astana, Kazakhstan","institution_ids":["https://openalex.org/I10232997"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120881123","display_name":"Emerson Raja Joseph","orcid":null},"institutions":[{"id":"https://openalex.org/I173029219","display_name":"Multimedia University","ror":"https://ror.org/04zrbnc33","country_code":"MY","type":"education","lineage":["https://openalex.org/I173029219"]}],"countries":["MY"],"is_corresponding":false,"raw_author_name":"Emerson Raja Joseph","raw_affiliation_strings":["Faculty of Engineering and Technology, Multimedia University, Melaka, Malaysia"],"raw_orcid":"https://orcid.org/0000-0002-4512-0802","affiliations":[{"raw_affiliation_string":"Faculty of Engineering and Technology, Multimedia University, Melaka, Malaysia","institution_ids":["https://openalex.org/I173029219"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5120929316","display_name":"Ainur Zhumadillayeva","orcid":null},"institutions":[{"id":"https://openalex.org/I10232997","display_name":"L. N. Gumilyov Eurasian National University","ror":"https://ror.org/0242cby63","country_code":"KZ","type":"education","lineage":["https://openalex.org/I10232997"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Ainur Zhumadillayeva","raw_affiliation_strings":["Faculty of Information Technologies, L. N. Gumilyov Eurasian National University, Astana, Kazakhstan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Faculty of Information Technologies, L. N. Gumilyov Eurasian National University, Astana, Kazakhstan","institution_ids":["https://openalex.org/I10232997"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113176964","display_name":"Aigerim Baimakhanova","orcid":null},"institutions":[{"id":"https://openalex.org/I2801707353","display_name":"Ahmet Yesevi University","ror":"https://ror.org/01gtvs751","country_code":"KZ","type":"education","lineage":["https://openalex.org/I2801707353"]}],"countries":["KZ"],"is_corresponding":false,"raw_author_name":"Aigerim Baimakhanova","raw_affiliation_strings":["Department of Computer Engineering, Khoja Akhmet Yassawi International Kazakh-Turkish University, Turkistan, Kazakhstan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Khoja Akhmet Yassawi International Kazakh-Turkish University, Turkistan, Kazakhstan","institution_ids":["https://openalex.org/I2801707353"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":{"value":1850,"currency":"USD","value_usd":1850},"apc_paid":{"value":1850,"currency":"USD","value_usd":1850},"fwci":0.9349,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.82497606,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":98},"biblio":{"volume":"14","issue":null,"first_page":"28041","last_page":"28053"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9664000272750854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9664000272750854,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.007799999788403511,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.004399999976158142,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.6401000022888184},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4909000098705292},{"id":"https://openalex.org/keywords/optical-character-recognition","display_name":"Optical character recognition","score":0.43479999899864197},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4341000020503998},{"id":"https://openalex.org/keywords/precision-and-recall","display_name":"Precision and recall","score":0.4278999865055084},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.41920000314712524},{"id":"https://openalex.org/keywords/architecture","display_name":"Architecture","score":0.4097999930381775},{"id":"https://openalex.org/keywords/automation","display_name":"Automation","score":0.3582000136375427},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3515999913215637}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8414000272750854},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6614999771118164},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.6401000022888184},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4909000098705292},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.436599999666214},{"id":"https://openalex.org/C546480517","wikidata":"https://www.wikidata.org/wiki/Q167555","display_name":"Optical character recognition","level":3,"score":0.43479999899864197},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4341000020503998},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.4278999865055084},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.41920000314712524},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.4097999930381775},{"id":"https://openalex.org/C115901376","wikidata":"https://www.wikidata.org/wiki/Q184199","display_name":"Automation","level":2,"score":0.3582000136375427},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3515999913215637},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.34290000796318054},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.33649998903274536},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.33640000224113464},{"id":"https://openalex.org/C2780479914","wikidata":"https://www.wikidata.org/wiki/Q302088","display_name":"Document classification","level":2,"score":0.33009999990463257},{"id":"https://openalex.org/C12267149","wikidata":"https://www.wikidata.org/wiki/Q282453","display_name":"Support vector machine","level":2,"score":0.32760000228881836},{"id":"https://openalex.org/C150899416","wikidata":"https://www.wikidata.org/wiki/Q1820378","display_name":"Transfer of learning","level":2,"score":0.31209999322891235},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.29829999804496765},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.27790001034736633},{"id":"https://openalex.org/C2778827112","wikidata":"https://www.wikidata.org/wiki/Q22245680","display_name":"Feature engineering","level":3,"score":0.2727000117301941},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2712000012397766},{"id":"https://openalex.org/C67905146","wikidata":"https://www.wikidata.org/wiki/Q5287646","display_name":"Document processing","level":2,"score":0.2709999978542328},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2702000141143799},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26269999146461487},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.260699987411499},{"id":"https://openalex.org/C113843644","wikidata":"https://www.wikidata.org/wiki/Q901882","display_name":"Interface (matter)","level":4,"score":0.2551000118255615}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/access.2025.3646481","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3646481","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1109/access.2025.3646481","is_oa":true,"landing_page_url":"https://doi.org/10.1109/access.2025.3646481","pdf_url":null,"source":{"id":"https://openalex.org/S2485537415","display_name":"IEEE Access","issn_l":"2169-3536","issn":["2169-3536"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Access","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0,66],"rapid":[1],"expansion":[2],"of":[3,14,101,111,129,168,179,218],"digital":[4,183],"archives":[5],"and":[6,16,117,125,132,140,185,221,232],"scanned":[7,210],"document":[8,18,180,203],"collections":[9],"has":[10],"underscored":[11],"the":[12,74,104,133,151,166,177],"importance":[13],"reliable":[15],"efficient":[17],"classification":[19,95,152,181],"techniques.":[20],"Traditional":[21],"methods":[22,195],"that":[23,54,196],"combine":[24],"optical":[25],"character":[26],"recognition":[27],"(OCR)":[28],"with":[29,60],"classical":[30],"machine":[31],"learning":[32,52],"often":[33],"fall":[34],"short":[35],"when":[36],"processing":[37],"diverse,":[38],"low-quality,":[39],"or":[40,201],"unstructured":[41],"archival":[42,102,190,211,237],"documents.":[43,65,212],"In":[44],"response,":[45],"this":[46,205],"study":[47],"introduces":[48],"a":[49,56,88,99,155,160,215,224],"hybrid":[50,169],"deep":[51],"framework":[53],"merges":[55],"Vision":[57],"Transformer":[58],"(ViT)":[59],"EfficientNet":[61,67],"for":[62],"classifying":[63],"visual":[64],"component":[68,76],"captures":[69],"detailed":[70],"local":[71],"features,":[72],"while":[73],"ViT":[75,119],"focuses":[77],"on":[78,98,198,235],"broader":[79],"contextual":[80],"information.":[81],"These":[82,163],"complementary":[83],"insights":[84],"are":[85],"unified":[86,225],"through":[87],"feature":[89],"fusion":[90],"mechanism,":[91],"resulting":[92],"in":[93,174,182],"improved":[94],"accuracy.":[96],"Tested":[97],"dataset":[100],"materials,":[103],"HybridViT":[105],"model":[106,134],"reached":[107],"an":[108],"overall":[109],"accuracy":[110],"98.2%,":[112],"surpassing":[113],"standard":[114],"CNN":[115,172],"(92.3%)":[116],"standalone":[118],"(94.1%)":[120],"models.":[121],"Additionally,":[122],"both":[123,229],"precision":[124],"recall":[126],"saw":[127],"gains":[128],"around":[130],"3\u20135%,":[131],"demonstrated":[135],"enhanced":[136],"resilience":[137],"to":[138,149,188],"noise":[139],"distortions.":[141],"A":[142],"prototype":[143],"information":[144],"system":[145],"was":[146],"also":[147],"created":[148],"incorporate":[150],"engine":[153],"into":[154],"user-friendly":[156],"interface":[157],"backed":[158],"by":[159],"structured":[161,216],"database.":[162],"findings":[164],"highlight":[165],"promise":[167],"transformer":[170],"-":[171],"models":[173],"pushing":[175],"forward":[176],"automation":[178],"repositories":[184],"enhancing":[186],"access":[187],"extensive":[189],"datasets.":[191],"Unlike":[192],"earlier":[193],"YOLO-based":[194],"concentrated":[197],"natural":[199],"imagery":[200],"artificial":[202],"datasets,":[204],"research":[206],"specifically":[207],"addresses":[208],"manually":[209],"It":[213],"conducts":[214],"comparison":[217],"YOLOv4,":[219],"YOLOv5,":[220],"YOLOv8":[222],"using":[223],"training":[226],"setup,":[227],"evaluating":[228],"detection":[230],"metrics":[231],"deployment-relevant":[233],"factors":[234],"real":[236],"scan":[238],"data.":[239]},"counts_by_year":[{"year":2026,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-12-21T00:00:00"}
