{"id":"https://openalex.org/W1928906481","doi":"https://doi.org/10.1109/cvpr.2015.7298685","title":"The application of two-level attention models in deep convolutional neural network for fine-grained image classification","display_name":"The application of two-level attention models in deep convolutional neural network for fine-grained image classification","publication_year":2015,"publication_date":"2015-06-01","ids":{"openalex":"https://openalex.org/W1928906481","doi":"https://doi.org/10.1109/cvpr.2015.7298685","mag":"1928906481"},"language":"en","primary_location":{"id":"doi:10.1109/cvpr.2015.7298685","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2015.7298685","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5020757657","display_name":"Tianjun Xiao","orcid":"https://orcid.org/0000-0003-4705-1545"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]},{"id":"https://openalex.org/I80293557","display_name":"University of Computer Studies Yangon","ror":"https://ror.org/019ad8130","country_code":"MM","type":"education","lineage":["https://openalex.org/I80293557"]}],"countries":["CN","MM"],"is_corresponding":true,"raw_author_name":"Tianjun Xiao","raw_affiliation_strings":["Institute of Computer Science and Technology, Peking University","Institute of Computer Sci. & Tech., Peking University, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Science and Technology, Peking University","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I80293557"]},{"raw_affiliation_string":"Institute of Computer Sci. & Tech., Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006654661","display_name":"Yichong Xu","orcid":"https://orcid.org/0000-0002-1302-738X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]},{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Yichong Xu","raw_affiliation_strings":["Microsoft Research, Beijing","Microsoft Research, , Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Beijing","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research, , Beijing, China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023035795","display_name":"Kuiyuan Yang","orcid":"https://orcid.org/0000-0003-3063-2925"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Kuiyuan Yang","raw_affiliation_strings":["Microsoft Research, Beijing","Microsoft Research, , Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Beijing","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research, , Beijing, China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100628333","display_name":"Jiaxing Zhang","orcid":"https://orcid.org/0000-0002-1324-6486"},"institutions":[{"id":"https://openalex.org/I4210113369","display_name":"Microsoft Research Asia (China)","ror":"https://ror.org/0300m5276","country_code":"CN","type":"company","lineage":["https://openalex.org/I1290206253","https://openalex.org/I4210113369"]},{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Jiaxing Zhang","raw_affiliation_strings":["Microsoft Research, Beijing","Microsoft Research, , Beijing, China"],"affiliations":[{"raw_affiliation_string":"Microsoft Research, Beijing","institution_ids":["https://openalex.org/I4210113369"]},{"raw_affiliation_string":"Microsoft Research, , Beijing, China","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047811387","display_name":"Yuxin Peng","orcid":"https://orcid.org/0000-0001-7658-3845"},"institutions":[{"id":"https://openalex.org/I80293557","display_name":"University of Computer Studies Yangon","ror":"https://ror.org/019ad8130","country_code":"MM","type":"education","lineage":["https://openalex.org/I80293557"]},{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN","MM"],"is_corresponding":false,"raw_author_name":"Yuxin Peng","raw_affiliation_strings":["Institute of Computer Science and Technology, Peking University","Institute of Computer Sci. & Tech., Peking University, China"],"affiliations":[{"raw_affiliation_string":"Institute of Computer Science and Technology, Peking University","institution_ids":["https://openalex.org/I20231570","https://openalex.org/I80293557"]},{"raw_affiliation_string":"Institute of Computer Sci. & Tech., Peking University, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100459168","display_name":"Zheng Zhang","orcid":"https://orcid.org/0000-0003-1470-6998"},"institutions":[{"id":"https://openalex.org/I258800397","display_name":"New York University Shanghai","ror":"https://ror.org/02vpsdb40","country_code":"CN","type":"education","lineage":["https://openalex.org/I258800397","https://openalex.org/I57206974"]},{"id":"https://openalex.org/I57206974","display_name":"New York University","ror":"https://ror.org/0190ak572","country_code":"US","type":"education","lineage":["https://openalex.org/I57206974"]}],"countries":["CN","US"],"is_corresponding":false,"raw_author_name":"Zheng Zhang","raw_affiliation_strings":["New York University Shanghai","New York University, Shanghai, China#TAB#"],"affiliations":[{"raw_affiliation_string":"New York University Shanghai","institution_ids":["https://openalex.org/I57206974"]},{"raw_affiliation_string":"New York University, Shanghai, China#TAB#","institution_ids":["https://openalex.org/I258800397"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5020757657"],"corresponding_institution_ids":["https://openalex.org/I20231570","https://openalex.org/I80293557"],"apc_list":null,"apc_paid":null,"fwci":41.0502,"has_fulltext":false,"cited_by_count":805,"citation_normalized_percentile":{"value":0.99809924,"is_in_top_1_percent":true,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"842","last_page":"850"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8313314318656921},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8206518888473511},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7665256261825562},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7439815402030945},{"id":"https://openalex.org/keywords/minimum-bounding-box","display_name":"Minimum bounding box","score":0.7246156930923462},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.7080873847007751},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.5813655257225037},{"id":"https://openalex.org/keywords/bounding-overwatch","display_name":"Bounding overwatch","score":0.5609533786773682},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5561563968658447},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.5340449213981628},{"id":"https://openalex.org/keywords/constraint","display_name":"Constraint (computer-aided design)","score":0.5001821517944336},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4964922070503235},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.4848271608352661},{"id":"https://openalex.org/keywords/deep-neural-networks","display_name":"Deep neural networks","score":0.47384148836135864},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4660796821117401},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4514349102973938},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4330015778541565},{"id":"https://openalex.org/keywords/task-analysis","display_name":"Task analysis","score":0.4256584048271179},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4176117181777954},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.26471146941185}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8313314318656921},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8206518888473511},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7665256261825562},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7439815402030945},{"id":"https://openalex.org/C147037132","wikidata":"https://www.wikidata.org/wiki/Q6865426","display_name":"Minimum bounding box","level":3,"score":0.7246156930923462},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.7080873847007751},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.5813655257225037},{"id":"https://openalex.org/C63584917","wikidata":"https://www.wikidata.org/wiki/Q333286","display_name":"Bounding overwatch","level":2,"score":0.5609533786773682},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5561563968658447},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.5340449213981628},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5001821517944336},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4964922070503235},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.4848271608352661},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.47384148836135864},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4660796821117401},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4514349102973938},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4330015778541565},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.4256584048271179},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4176117181777954},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.26471146941185},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/cvpr.2015.7298685","is_oa":false,"landing_page_url":"https://doi.org/10.1109/cvpr.2015.7298685","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2015 IEEE Conference on Computer Vision and Pattern Recognition (CVPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7200000286102295,"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":40,"referenced_works":["https://openalex.org/W56385144","https://openalex.org/W64897123","https://openalex.org/W166750225","https://openalex.org/W1616462885","https://openalex.org/W1686810756","https://openalex.org/W1797268635","https://openalex.org/W1846799578","https://openalex.org/W1995543189","https://openalex.org/W2010181071","https://openalex.org/W2062118960","https://openalex.org/W2079789819","https://openalex.org/W2087560337","https://openalex.org/W2088049833","https://openalex.org/W2102605133","https://openalex.org/W2103444992","https://openalex.org/W2118696714","https://openalex.org/W2119525058","https://openalex.org/W2135706578","https://openalex.org/W2152411181","https://openalex.org/W2163605009","https://openalex.org/W2168356304","https://openalex.org/W2169501191","https://openalex.org/W2206858481","https://openalex.org/W2533598788","https://openalex.org/W2949194058","https://openalex.org/W2953360861","https://openalex.org/W4294375521","https://openalex.org/W6602324145","https://openalex.org/W6602625923","https://openalex.org/W6606767076","https://openalex.org/W6636475194","https://openalex.org/W6637373629","https://openalex.org/W6638677478","https://openalex.org/W6653248861","https://openalex.org/W6675541922","https://openalex.org/W6677700107","https://openalex.org/W6682778277","https://openalex.org/W6684191040","https://openalex.org/W6684876274","https://openalex.org/W6688059459"],"related_works":["https://openalex.org/W4237171675","https://openalex.org/W3036286480","https://openalex.org/W4287027631","https://openalex.org/W3192357901","https://openalex.org/W2387360586","https://openalex.org/W2952736415","https://openalex.org/W3209723314","https://openalex.org/W3205398323","https://openalex.org/W2883297582","https://openalex.org/W4390524233"],"abstract_inverted_index":{"Fine-grained":[0],"classification":[1,30,59],"is":[2,179],"challenging":[3],"because":[4],"categories":[5],"can":[6],"only":[7],"be":[8],"discriminated":[9],"by":[10],"subtle":[11],"and":[12,91,117,158,167],"local":[13],"differences.":[14],"Variances":[15],"in":[16],"the":[17,24,33,72,79,92,115,147,150,153,169,173],"pose,":[18],"scale":[19],"or":[20,39,129],"rotation":[21],"usually":[22],"make":[23],"problem":[25],"more":[26],"difficult.":[27],"Most":[28],"fine-grained":[29,58],"systems":[31],"follow":[32],"pipeline":[34,66,163],"of":[35,70,149,155],"finding":[36],"foreground":[37],"object":[38,40],"parts":[41],"(where)":[42],"to":[43,53,57,87,104,112,142],"extract":[44],"discriminative":[45,98],"features":[46],"(what).":[47],"In":[48],"this":[49],"paper,":[50],"we":[51,121],"propose":[52,76],"apply":[54],"visual":[55],"attention":[56,74,82,95],"task":[60],"using":[61,123],"deep":[62,107],"neural":[63],"network.":[64],"Our":[65,162],"integrates":[67],"three":[68],"types":[69],"attention:":[71],"bottom-up":[73],"that":[75,83,96,184],"candidate":[77],"patches,":[78],"object-level":[80],"top-down":[81,94],"selects":[84],"relevant":[85],"patches":[86],"a":[88],"certain":[89],"object,":[90],"part-level":[93],"localizes":[97],"parts.":[99],"We":[100,144],"combine":[101],"these":[102],"attentions":[103],"train":[105],"domain-specific":[106],"nets,":[108],"then":[109],"use":[110],"it":[111],"improve":[113],"both":[114],"what":[116],"where":[118],"aspects.":[119],"Importantly,":[120],"avoid":[122],"expensive":[124],"annotations":[125],"like":[126],"bounding":[127],"box":[128],"part":[130],"information":[131],"from":[132],"end-to-end.":[133],"The":[134,177],"weak":[135],"supervision":[136,175],"constraint":[137],"makes":[138],"our":[139],"work":[140],"easier":[141],"generalize.":[143],"have":[145],"verified":[146],"effectiveness":[148],"method":[151],"on":[152,186],"subsets":[154],"ILSVRC2012":[156],"dataset":[157],"CUB200":[159],"2011":[160],"dataset.":[161],"delivered":[164],"significant":[165],"improvements":[166],"achieved":[168],"best":[170],"accuracy":[171],"under":[172],"weakest":[174],"condition.":[176],"performance":[178],"competitive":[180],"against":[181],"other":[182],"methods":[183],"rely":[185],"additional":[187],"annotations.":[188]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":24},{"year":2024,"cited_by_count":47},{"year":2023,"cited_by_count":64},{"year":2022,"cited_by_count":78},{"year":2021,"cited_by_count":112},{"year":2020,"cited_by_count":108},{"year":2019,"cited_by_count":151},{"year":2018,"cited_by_count":102},{"year":2017,"cited_by_count":58},{"year":2016,"cited_by_count":49},{"year":2015,"cited_by_count":11}],"updated_date":"2026-03-09T08:58:05.943551","created_date":"2025-10-10T00:00:00"}
