{"id":"https://openalex.org/W3206734547","doi":"https://doi.org/10.1145/3474085.3475561","title":"RAMS-Trans: Recurrent Attention Multi-scale Transformer for Fine-grained Image Recognition","display_name":"RAMS-Trans: Recurrent Attention Multi-scale Transformer for Fine-grained Image Recognition","publication_year":2021,"publication_date":"2021-10-17","ids":{"openalex":"https://openalex.org/W3206734547","doi":"https://doi.org/10.1145/3474085.3475561","mag":"3206734547"},"language":"en","primary_location":{"id":"doi:10.1145/3474085.3475561","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475561","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5076364161","display_name":"Yunqing Hu","orcid":"https://orcid.org/0009-0000-0849-6475"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yunqing Hu","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057169984","display_name":"Xuan Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xuan Jin","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100343712","display_name":"Yin Zhang\u22c6","orcid":"https://orcid.org/0000-0002-1772-0763"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yin Zhang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045259909","display_name":"Haiwen Hong","orcid":null},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haiwen Hong","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100749004","display_name":"Jingfeng Zhang","orcid":"https://orcid.org/0000-0003-3491-8074"},"institutions":[{"id":"https://openalex.org/I76130692","display_name":"Zhejiang University","ror":"https://ror.org/00a2xv884","country_code":"CN","type":"education","lineage":["https://openalex.org/I76130692"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingfeng Zhang","raw_affiliation_strings":["Zhejiang University, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Zhejiang University, Hangzhou, China","institution_ids":["https://openalex.org/I76130692"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101802340","display_name":"Yuan He","orcid":"https://orcid.org/0000-0002-6885-1341"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuan He","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5057436798","display_name":"Hui Xue","orcid":"https://orcid.org/0000-0002-5856-4445"},"institutions":[{"id":"https://openalex.org/I45928872","display_name":"Alibaba Group (China)","ror":"https://ror.org/00k642b80","country_code":"CN","type":"company","lineage":["https://openalex.org/I45928872"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hui Xue","raw_affiliation_strings":["Alibaba Group, Hangzhou, China"],"affiliations":[{"raw_affiliation_string":"Alibaba Group, Hangzhou, China","institution_ids":["https://openalex.org/I45928872"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":7,"corresponding_author_ids":["https://openalex.org/A5076364161"],"corresponding_institution_ids":["https://openalex.org/I76130692"],"apc_list":null,"apc_paid":null,"fwci":8.4592,"has_fulltext":false,"cited_by_count":128,"citation_normalized_percentile":{"value":0.98411117,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"4239","last_page":"4248"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991000294685364,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8755992650985718},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7221013903617859},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6958576440811157},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6634308695793152},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6355434656143188},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5961765050888062},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.332302987575531},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.06385341286659241}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8755992650985718},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7221013903617859},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6958576440811157},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6634308695793152},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6355434656143188},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5961765050888062},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.332302987575531},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.06385341286659241},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3474085.3475561","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3474085.3475561","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 29th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.7400000095367432,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":39,"referenced_works":["https://openalex.org/W1616462885","https://openalex.org/W1797268635","https://openalex.org/W2104657103","https://openalex.org/W2194775991","https://openalex.org/W2274287116","https://openalex.org/W2295107390","https://openalex.org/W2403585668","https://openalex.org/W2737725206","https://openalex.org/W2740620254","https://openalex.org/W2741910023","https://openalex.org/W2787420051","https://openalex.org/W2797977484","https://openalex.org/W2798365843","https://openalex.org/W2889469641","https://openalex.org/W2891951760","https://openalex.org/W2892035828","https://openalex.org/W2961018736","https://openalex.org/W2963090248","https://openalex.org/W2963341956","https://openalex.org/W2963393555","https://openalex.org/W2963403868","https://openalex.org/W2963407932","https://openalex.org/W2964110616","https://openalex.org/W2970507129","https://openalex.org/W2970597249","https://openalex.org/W2986821660","https://openalex.org/W2990495699","https://openalex.org/W2997426000","https://openalex.org/W2998345525","https://openalex.org/W2998619563","https://openalex.org/W3034676907","https://openalex.org/W3035367622","https://openalex.org/W3044267316","https://openalex.org/W3081907075","https://openalex.org/W3092462694","https://openalex.org/W3094502228","https://openalex.org/W3108870912","https://openalex.org/W3139434170","https://openalex.org/W3170874841"],"related_works":["https://openalex.org/W2965546495","https://openalex.org/W4389116644","https://openalex.org/W2153315159","https://openalex.org/W3103844505","https://openalex.org/W259157601","https://openalex.org/W4205463238","https://openalex.org/W2110523656","https://openalex.org/W1482209366","https://openalex.org/W2404514746","https://openalex.org/W1652783584"],"abstract_inverted_index":{"In":[0],"fine-grained":[1],"image":[2,167,175],"recognition":[3],"(FGIR),":[4],"the":[5,69,89,100,103,108,111,116,121,129,144,150,163,173,181,193,196,210],"localization":[6],"and":[7,60,75,177,218],"amplification":[8,160],"of":[9,72,91,102,110,146,165,195],"region":[10,85,93,136,159,182],"attention":[11,64,94,104,123,137,183,197,211],"is":[12,46,53,76],"an":[13,204],"important":[14],"factor,":[15],"which":[16,127],"has":[17,33],"been":[18],"explored":[19],"extensively":[20],"convolutional":[21],"neural":[22],"networks":[23],"(CNNs)":[24],"based":[25],"approaches.":[26],"The":[27,169],"recently":[28],"developed":[29],"vision":[30,39],"transformer":[31,125],"(ViT)":[32],"achieved":[34],"promising":[35],"results":[36,242],"in":[37,55,138,234],"computer":[38],"tasks.":[40],"Compared":[41],"with":[42,172,215],"CNNs,":[43],"Image":[44],"sequentialization":[45],"a":[47,139],"brand":[48],"new":[49,186],"manner.":[50,141],"However,":[51],"ViT":[52,216],"limited":[54],"its":[56,73],"receptive":[57],"field":[58],"size":[59,71],"thus":[61],"lacks":[62],"local":[63,191],"like":[65],"CNNs":[66],"due":[67],"to":[68,78,82,106,115,132,161,184,190,236],"fixed":[70],"patches,":[74],"unable":[77],"generate":[79,185],"multi-scale":[80,124,140,166],"features":[81],"learn":[83,134],"discriminative":[84,92,135],"attention.":[86],"To":[87],"facilitate":[88],"learning":[90],"without":[95],"box/part":[96],"annotations,":[97],"we":[98],"use":[99],"strength":[101],"weights":[105,198,212],"measure":[107],"importance":[109],"patch":[112,152],"tokens":[113],"corresponding":[114],"raw":[117],"images.":[118],"We":[119],"propose":[120],"recurrent":[122],"(RAMS-Trans),":[126],"uses":[128],"transformer's":[130],"self-attention":[131],"recursively":[133],"Specifically,":[142],"at":[143,200],"core":[145],"our":[147],"approach":[148,207],"lies":[149],"dynamic":[151],"proposal":[153],"module":[154],"(DPPM)":[155],"responsible":[156],"for":[157],"guiding":[158],"complete":[162],"integration":[164],"patches.":[168],"DPPM":[170],"starts":[171],"full-size":[174],"patches":[176,187],"iteratively":[178],"scales":[179],"up":[180],"from":[188],"global":[189],"by":[192],"intensity":[194],"generated":[199],"each":[201],"scale":[202],"as":[203],"indicator.":[205],"Our":[206],"requires":[208],"only":[209],"that":[213,227],"come":[214],"itself":[217],"can":[219],"be":[220],"easily":[221],"trained":[222],"end-to-end.":[223],"Extensive":[224],"experiments":[225],"demonstrate":[226],"RAMS-Trans":[228],"performs":[229],"better":[230],"than":[231],"exising":[232],"works,":[233],"addition":[235],"efficient":[237],"CNN":[238],"models,":[239],"achieving":[240],"state-of-the-art":[241],"on":[243],"three":[244],"benchmark":[245],"datasets.":[246]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":38},{"year":2024,"cited_by_count":46},{"year":2023,"cited_by_count":27},{"year":2022,"cited_by_count":14},{"year":2021,"cited_by_count":1}],"updated_date":"2026-03-01T08:55:55.761014","created_date":"2025-10-10T00:00:00"}
