{"id":"https://openalex.org/W7125914583","doi":"https://doi.org/10.1109/smc58881.2025.11343599","title":"Multi-level Feature Masking Network for Fine-grained Visual Classification","display_name":"Multi-level Feature Masking Network for Fine-grained Visual Classification","publication_year":2025,"publication_date":"2025-10-05","ids":{"openalex":"https://openalex.org/W7125914583","doi":"https://doi.org/10.1109/smc58881.2025.11343599"},"language":null,"primary_location":{"id":"doi:10.1109/smc58881.2025.11343599","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5112896797","display_name":"Sheng You","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Sheng You","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Cyber Security"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Cyber Security","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067617335","display_name":"Gang Wang","orcid":"https://orcid.org/0000-0002-6395-9409"},"institutions":[{"id":"https://openalex.org/I4210145669","display_name":"Shanghai Police College","ror":"https://ror.org/0479fds27","country_code":"CN","type":"education","lineage":["https://openalex.org/I4210145669"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wang Gang","raw_affiliation_strings":["Sichuan Police College,Police Integration Computing Key Laboratory of Sichuan Province"],"affiliations":[{"raw_affiliation_string":"Sichuan Police College,Police Integration Computing Key Laboratory of Sichuan Province","institution_ids":["https://openalex.org/I4210145669"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101798913","display_name":"Yang Yu","orcid":"https://orcid.org/0009-0000-4686-1170"},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yang Yu","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Cyber Security"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Cyber Security","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5124129032","display_name":"Zhou Linna","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhou Linna","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Cyber Security"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Cyber Security","institution_ids":["https://openalex.org/I139759216"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5124069549","display_name":"Meng Xiangli","orcid":null},"institutions":[{"id":"https://openalex.org/I139759216","display_name":"Beijing University of Posts and Telecommunications","ror":"https://ror.org/04w9fbh59","country_code":"CN","type":"education","lineage":["https://openalex.org/I139759216"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Meng Xiangli","raw_affiliation_strings":["Beijing University of Posts and Telecommunications,School of Cyber Security"],"affiliations":[{"raw_affiliation_string":"Beijing University of Posts and Telecommunications,School of Cyber Security","institution_ids":["https://openalex.org/I139759216"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5112896797"],"corresponding_institution_ids":["https://openalex.org/I139759216"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.71654906,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2424","last_page":"2430"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.21739999949932098,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.21739999949932098,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.2143000066280365,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11448","display_name":"Face recognition and analysis","score":0.12710000574588776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.7139999866485596},{"id":"https://openalex.org/keywords/salient","display_name":"Salient","score":0.6744999885559082},{"id":"https://openalex.org/keywords/masking","display_name":"Masking (illustration)","score":0.6736999750137329},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.555899977684021},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5468999743461609},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.49309998750686646},{"id":"https://openalex.org/keywords/semantic-feature","display_name":"Semantic feature","score":0.47209998965263367},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.4357999861240387}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8026999831199646},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.7139999866485596},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6930000185966492},{"id":"https://openalex.org/C2780719617","wikidata":"https://www.wikidata.org/wiki/Q1030752","display_name":"Salient","level":2,"score":0.6744999885559082},{"id":"https://openalex.org/C2777402240","wikidata":"https://www.wikidata.org/wiki/Q6783436","display_name":"Masking (illustration)","level":2,"score":0.6736999750137329},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.555899977684021},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5468999743461609},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.49309998750686646},{"id":"https://openalex.org/C2781122975","wikidata":"https://www.wikidata.org/wiki/Q16928266","display_name":"Semantic feature","level":2,"score":0.47209998965263367},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.4357999861240387},{"id":"https://openalex.org/C64754055","wikidata":"https://www.wikidata.org/wiki/Q7574053","display_name":"Spatial contextual awareness","level":2,"score":0.43220001459121704},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4041000008583069},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.35910001397132874},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3441999852657318},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3294000029563904},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.32019999623298645},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.31439998745918274},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.31310001015663147},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2921999990940094},{"id":"https://openalex.org/C12713177","wikidata":"https://www.wikidata.org/wiki/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.28610000014305115},{"id":"https://openalex.org/C2779200073","wikidata":"https://www.wikidata.org/wiki/Q18395575","display_name":"Visual masking","level":4,"score":0.2732999920845032},{"id":"https://openalex.org/C126422989","wikidata":"https://www.wikidata.org/wiki/Q93586","display_name":"Feature detection (computer vision)","level":4,"score":0.26019999384880066}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/smc58881.2025.11343599","is_oa":false,"landing_page_url":"https://doi.org/10.1109/smc58881.2025.11343599","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE International Conference on Systems, Man, and Cybernetics (SMC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Industry, innovation and infrastructure","score":0.46657219529151917,"id":"https://metadata.un.org/sdg/9"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W1954152232","https://openalex.org/W2138011018","https://openalex.org/W2737725206","https://openalex.org/W2743567459","https://openalex.org/W2951464224","https://openalex.org/W2986821660","https://openalex.org/W2998619563","https://openalex.org/W3005359536","https://openalex.org/W3126558081","https://openalex.org/W3130788242","https://openalex.org/W3139434170","https://openalex.org/W3200659198","https://openalex.org/W3204826552","https://openalex.org/W3206734547","https://openalex.org/W4210771642","https://openalex.org/W4304084267","https://openalex.org/W4312361652","https://openalex.org/W4313886956","https://openalex.org/W4320008793","https://openalex.org/W4321021073","https://openalex.org/W4386832633","https://openalex.org/W4392896368","https://openalex.org/W4401633962","https://openalex.org/W4411244874"],"related_works":[],"abstract_inverted_index":{"Fine-grained":[0],"visual":[1],"classification":[2],"(FGVC)":[3],"requires":[4],"capturing":[5],"subtle":[6],"distinctions":[7],"between":[8],"sub-categories,":[9],"emphasizing":[10],"both":[11],"global":[12],"context":[13],"and":[14,104,113,125],"local":[15],"details.":[16],"Existing":[17],"methods":[18],"often":[19],"rely":[20],"on":[21,37,61,122,128],"bounding":[22],"boxes":[23],"or":[24,35],"attention-based":[25],"patch":[26,41],"selection,":[27],"but":[28],"these":[29,54],"approaches":[30],"either":[31],"introduce":[32],"background":[33],"noise":[34],"depend":[36],"a":[38,45,129],"manually":[39],"predefined":[40],"number.":[42],"We":[43],"propose":[44],"novel":[46],"Multi-level":[47],"Feature":[48,82,108],"Masking":[49],"(MFM)":[50],"architecture":[51],"to":[52,87],"address":[53],"issues":[55],"by":[56],"predicting":[57,85],"spatial":[58,112],"masks":[59,86],"directly":[60],"intermediate":[62],"feature":[63],"maps,":[64],"highlighting":[65],"salient":[66],"features":[67],"while":[68],"suppressing":[69],"irrelevant":[70],"regions":[71],"without":[72],"discarding":[73],"information.":[74],"MFM":[75],"comprises":[76],"three":[77],"key":[78],"modules:":[79],"(1)":[80],"Mask":[81,93],"Alignment":[83,95],"(MFA),":[84],"align":[88],"multi-level":[89],"features;":[90],"(2)":[91],"Cross-layer":[92,107],"Semantic":[94],"(CMSA),":[96],"leveraging":[97],"high-level":[98],"semantic":[99],"information":[100],"for":[101],"inter-layer":[102],"alignment;":[103],"(3)":[105],"Graph-based":[106],"Enhancement":[109],"(CFE),":[110],"enriching":[111],"structural":[114],"representations.":[115],"Extensive":[116],"experiments":[117],"confirm":[118],"MFM\u2019s":[119],"competitive":[120],"performance":[121],"FGVC":[123],"benchmarks":[124],"state-of-the-art":[126],"accuracy":[127],"luxury":[130],"goods":[131],"dataset.":[132],"Code":[133],"is":[134],"available":[135],"at":[136],"https://github.com/SylU0/MFM.":[137]},"counts_by_year":[],"updated_date":"2026-01-29T23:17:01.242718","created_date":"2026-01-29T00:00:00"}
