{"id":"https://openalex.org/W4416137237","doi":"https://doi.org/10.1109/ipta66025.2025.11222048","title":"MLP Fusion: Revisiting Convolutional Networks with Transformer-Based Insights","display_name":"MLP Fusion: Revisiting Convolutional Networks with Transformer-Based Insights","publication_year":2025,"publication_date":"2025-10-13","ids":{"openalex":"https://openalex.org/W4416137237","doi":"https://doi.org/10.1109/ipta66025.2025.11222048"},"language":null,"primary_location":{"id":"doi:10.1109/ipta66025.2025.11222048","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipta66025.2025.11222048","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Fourteenth International Conference on Image Processing, Theory, Tools &amp;amp; Applications (IPTA)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5014785461","display_name":"Erfan Alizadeh Noohi","orcid":null},"institutions":[{"id":"https://openalex.org/I128277893","display_name":"Bah\u00e7e\u015fehir University","ror":"https://ror.org/00yze4d93","country_code":"TR","type":"education","lineage":["https://openalex.org/I128277893"]}],"countries":["TR"],"is_corresponding":true,"raw_author_name":"Erfan Alizadeh Noohi","raw_affiliation_strings":["Bahcesehir University,Faculty of Computer Engineering,Istanbul,Turkiye"],"affiliations":[{"raw_affiliation_string":"Bahcesehir University,Faculty of Computer Engineering,Istanbul,Turkiye","institution_ids":["https://openalex.org/I128277893"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023872482","display_name":"Hassan Imani","orcid":"https://orcid.org/0000-0003-1566-3897"},"institutions":[{"id":"https://openalex.org/I128277893","display_name":"Bah\u00e7e\u015fehir University","ror":"https://ror.org/00yze4d93","country_code":"TR","type":"education","lineage":["https://openalex.org/I128277893"]}],"countries":["TR"],"is_corresponding":false,"raw_author_name":"Hassan Imani","raw_affiliation_strings":["Bahcesehir University,Faculty of Computer Engineering,Istanbul,Turkiye"],"affiliations":[{"raw_affiliation_string":"Bahcesehir University,Faculty of Computer Engineering,Istanbul,Turkiye","institution_ids":["https://openalex.org/I128277893"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5116048766","display_name":"Md Baharul Islam","orcid":"https://orcid.org/0009-0001-0613-5366"},"institutions":[{"id":"https://openalex.org/I2801014300","display_name":"Florida Gulf Coast University","ror":"https://ror.org/05tc5bm31","country_code":"US","type":"education","lineage":["https://openalex.org/I2801014300"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Md Baharul Islam","raw_affiliation_strings":["Florida Gulf Coast University,Department of Computing &#x0026; Software Engineering,USA"],"affiliations":[{"raw_affiliation_string":"Florida Gulf Coast University,Department of Computing &#x0026; Software Engineering,USA","institution_ids":["https://openalex.org/I2801014300"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5014785461"],"corresponding_institution_ids":["https://openalex.org/I128277893"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.36574031,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7477999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.7477999925613403,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12026","display_name":"Explainable Artificial Intelligence (XAI)","score":0.03970000147819519,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.03629999980330467,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.8205000162124634},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.5627999901771545},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5321000218391418},{"id":"https://openalex.org/keywords/multilayer-perceptron","display_name":"Multilayer perceptron","score":0.42730000615119934},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4262999892234802},{"id":"https://openalex.org/keywords/perceptron","display_name":"Perceptron","score":0.4000999927520752}],"concepts":[{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.8205000162124634},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7860999703407288},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7139000296592712},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.5627999901771545},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5321000218391418},{"id":"https://openalex.org/C179717631","wikidata":"https://www.wikidata.org/wiki/Q2991667","display_name":"Multilayer perceptron","level":3,"score":0.42730000615119934},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4262999892234802},{"id":"https://openalex.org/C60908668","wikidata":"https://www.wikidata.org/wiki/Q690207","display_name":"Perceptron","level":3,"score":0.4000999927520752},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.38580000400543213},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3418999910354614},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.28450000286102295},{"id":"https://openalex.org/C157899210","wikidata":"https://www.wikidata.org/wiki/Q1395022","display_name":"Convolutional code","level":3,"score":0.2827000021934509},{"id":"https://openalex.org/C123657996","wikidata":"https://www.wikidata.org/wiki/Q12271","display_name":"Architecture","level":2,"score":0.26030001044273376}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ipta66025.2025.11222048","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ipta66025.2025.11222048","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 Fourteenth International Conference on Image Processing, Theory, Tools &amp;amp; Applications (IPTA)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W1901129140","https://openalex.org/W2112796928","https://openalex.org/W2183341477","https://openalex.org/W2194775991","https://openalex.org/W2895752198","https://openalex.org/W2963314614","https://openalex.org/W2998508940","https://openalex.org/W3138516171","https://openalex.org/W3202897123","https://openalex.org/W4385245566","https://openalex.org/W4396834786","https://openalex.org/W4404838659"],"related_works":[],"abstract_inverted_index":{"Transformer-based":[0],"architectures":[1,28],"have":[2],"become":[3],"the":[4,21,78,93,101,114,133],"dominant":[5],"approach":[6],"for":[7,29],"a":[8,45,58],"wide":[9],"array":[10],"of":[11,23,104],"machine":[12],"learning":[13],"tasks,":[14],"including":[15],"those":[16,68],"in":[17,32,70,132],"computer":[18],"vision.":[19],"Consequently,":[20],"prevalence":[22],"purely":[24],"convolutional":[25,90],"networks-particularly":[26],"shallow-depth":[27],"classification-has":[30],"been":[31],"decline.":[33],"In":[34],"this":[35],"work,":[36],"we":[37,54,82],"revisit":[38],"Convolutional":[39],"Neural":[40],"Networks":[41],"(CNNs)":[42],"and":[43,116],"propose":[44],"modern":[46],"hybrid":[47],"architecture":[48],"that":[49,60,120],"integrates":[50],"Transformer-inspired":[51],"components.":[52],"Specifically,":[53],"introduce":[55],"MLP":[56,121],"Fusion,":[57],"model":[59],"incorporates":[61],"Multi-Layer":[62],"Perceptron":[63],"(MLP)":[64],"blocks,":[65],"similar":[66],"to":[67,77,99,127],"used":[69],"Vision":[71],"Transformers,":[72],"into":[73],"CNN":[74,129],"backbones":[75],"prior":[76],"classification":[79],"stage.":[80],"Additionally,":[81],"include":[83],"intermediate":[84],"<tex":[85],"xmlns:mml=\"http://www.w3.org/1998/Math/MathML\"":[86],"xmlns:xlink=\"http://www.w3.org/1999/xlink\">$1":[87],"\\times":[88],"1$</tex>":[89],"layers":[91],"within":[92],"backbone.":[94],"This":[95],"fusion":[96],"is":[97],"intended":[98],"enhance":[100],"representational":[102],"capacity":[103],"CNNs":[105],"by":[106],"enriching":[107],"their":[108],"embedding":[109],"space.":[110],"Experimental":[111],"evaluations":[112],"on":[113],"CIFAR-10":[115],"CIFAR-100":[117],"datasets":[118],"show":[119],"Fusion":[122],"achieves":[123],"better":[124],"performance":[125],"compared":[126],"compact":[128],"models":[130],"reported":[131],"literature.":[134]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-11-10T00:00:00"}
