{"id":"https://openalex.org/W4392154824","doi":"https://doi.org/10.1007/s11263-024-01988-x","title":"Training Object Detectors from Scratch: An Empirical Study in the Era of Vision Transformer","display_name":"Training Object Detectors from Scratch: An Empirical Study in the Era of Vision Transformer","publication_year":2024,"publication_date":"2024-02-26","ids":{"openalex":"https://openalex.org/W4392154824","doi":"https://doi.org/10.1007/s11263-024-01988-x"},"language":"en","primary_location":{"id":"doi:10.1007/s11263-024-01988-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-01988-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-01988-x.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-01988-x.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5054872468","display_name":"Weixiang Hong","orcid":"https://orcid.org/0000-0002-3794-3972"},"institutions":[{"id":"https://openalex.org/I63072094","display_name":"University of Portsmouth","ror":"https://ror.org/03ykbk197","country_code":"GB","type":"education","lineage":["https://openalex.org/I63072094"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Weixiang Hong","raw_affiliation_strings":["Ant Group, Hangzhou, China","University of Portsmouth, Portsmouth, UK"],"raw_orcid":"https://orcid.org/0000-0002-3794-3972","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]},{"raw_affiliation_string":"University of Portsmouth, Portsmouth, UK","institution_ids":["https://openalex.org/I63072094"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100952254","display_name":"Wang Ren","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang Ren","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076968944","display_name":"Jiangwei Lao","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jiangwei Lao","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072757173","display_name":"Lele Xie","orcid":"https://orcid.org/0000-0001-7731-9341"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Lele Xie","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0001-7731-9341","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5057033978","display_name":"Liheng Zhong","orcid":"https://orcid.org/0000-0002-8161-9168"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liheng Zhong","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-8161-9168","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100370428","display_name":"Jian Wang","orcid":"https://orcid.org/0000-0002-3164-0153"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jian Wang","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5014922345","display_name":"Jingdong Chen","orcid":"https://orcid.org/0000-0002-1872-2592"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Jingdong Chen","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":"https://orcid.org/0000-0002-1872-2592","affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085867312","display_name":"Honghai Liu","orcid":"https://orcid.org/0000-0002-2880-4698"},"institutions":[{"id":"https://openalex.org/I204983213","display_name":"Harbin Institute of Technology","ror":"https://ror.org/01yqg2h08","country_code":"CN","type":"education","lineage":["https://openalex.org/I204983213"]},{"id":"https://openalex.org/I63072094","display_name":"University of Portsmouth","ror":"https://ror.org/03ykbk197","country_code":"GB","type":"education","lineage":["https://openalex.org/I63072094"]}],"countries":["CN","GB"],"is_corresponding":true,"raw_author_name":"Honghai Liu","raw_affiliation_strings":["Harbin Institute of Technology, Shenzhen, China","University of Portsmouth, Portsmouth, UK"],"raw_orcid":"https://orcid.org/0000-0002-2880-4698","affiliations":[{"raw_affiliation_string":"Harbin Institute of Technology, Shenzhen, China","institution_ids":["https://openalex.org/I204983213"]},{"raw_affiliation_string":"University of Portsmouth, Portsmouth, UK","institution_ids":["https://openalex.org/I63072094"]}]},{"author_position":"last","author":{"id":null,"display_name":"Wei Chu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wei Chu","raw_affiliation_strings":["Ant Group, Hangzhou, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Ant Group, Hangzhou, China","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5014922345","https://openalex.org/A5054872468","https://openalex.org/A5057033978","https://openalex.org/A5072757173","https://openalex.org/A5076968944","https://openalex.org/A5085867312","https://openalex.org/A5100370428","https://openalex.org/A5100952254"],"corresponding_institution_ids":["https://openalex.org/I204983213","https://openalex.org/I63072094"],"apc_list":{"value":2890,"currency":"EUR","value_usd":3690},"apc_paid":{"value":2890,"currency":"EUR","value_usd":3690},"fwci":0.9523,"has_fulltext":true,"cited_by_count":4,"citation_normalized_percentile":{"value":0.74041748,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":"132","issue":"8","first_page":"2929","last_page":"2942"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.7599152326583862},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6786643862724304},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.628192663192749},{"id":"https://openalex.org/keywords/scratch","display_name":"Scratch","score":0.6097267866134644},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5660173296928406},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.5193182826042175},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.5058343410491943},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4138178825378418},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.31503361463546753},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.1926424205303192},{"id":"https://openalex.org/keywords/electrical-engineering","display_name":"Electrical engineering","score":0.16060179471969604},{"id":"https://openalex.org/keywords/telecommunications","display_name":"Telecommunications","score":0.10912582278251648},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.07740136981010437}],"concepts":[{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.7599152326583862},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6786643862724304},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.628192663192749},{"id":"https://openalex.org/C2781235140","wikidata":"https://www.wikidata.org/wiki/Q275131","display_name":"Scratch","level":2,"score":0.6097267866134644},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5660173296928406},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.5193182826042175},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.5058343410491943},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4138178825378418},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.31503361463546753},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.1926424205303192},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.16060179471969604},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.10912582278251648},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.07740136981010437},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1007/s11263-024-01988-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-01988-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-01988-x.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1007/s11263-024-01988-x","is_oa":true,"landing_page_url":"https://doi.org/10.1007/s11263-024-01988-x","pdf_url":"https://link.springer.com/content/pdf/10.1007/s11263-024-01988-x.pdf","source":{"id":"https://openalex.org/S25538012","display_name":"International Journal of Computer Vision","issn_l":"0920-5691","issn":["0920-5691","1573-1405"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319900","host_organization_name":"Springer Science+Business Media","host_organization_lineage":["https://openalex.org/P4310319900","https://openalex.org/P4310319965"],"host_organization_lineage_names":["Springer Science+Business Media","Springer Nature"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"International Journal of Computer Vision","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W4392154824.pdf"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W753847829","https://openalex.org/W1536680647","https://openalex.org/W1677182931","https://openalex.org/W1861492603","https://openalex.org/W2100921332","https://openalex.org/W2102605133","https://openalex.org/W2117539524","https://openalex.org/W2163605009","https://openalex.org/W2183341477","https://openalex.org/W2193145675","https://openalex.org/W2194775991","https://openalex.org/W2601564443","https://openalex.org/W2613718673","https://openalex.org/W2894651257","https://openalex.org/W2896457183","https://openalex.org/W2963150697","https://openalex.org/W2963446712","https://openalex.org/W2963813458","https://openalex.org/W2964241181","https://openalex.org/W2982770724","https://openalex.org/W2991391304","https://openalex.org/W3012573144","https://openalex.org/W3035396860","https://openalex.org/W3042930119","https://openalex.org/W3094502228","https://openalex.org/W3096609285","https://openalex.org/W3106250896","https://openalex.org/W3116489684","https://openalex.org/W3131500599","https://openalex.org/W3137278571","https://openalex.org/W3138516171","https://openalex.org/W3160694286","https://openalex.org/W3172509117","https://openalex.org/W3172752666","https://openalex.org/W3180659539","https://openalex.org/W3199761064","https://openalex.org/W4214493665","https://openalex.org/W4312769131","https://openalex.org/W4312950730","https://openalex.org/W6678668498","https://openalex.org/W6679349572","https://openalex.org/W6684207430","https://openalex.org/W6739901393","https://openalex.org/W6757817989","https://openalex.org/W6790690058","https://openalex.org/W6797790494"],"related_works":["https://openalex.org/W2475116013","https://openalex.org/W2770018148","https://openalex.org/W2358308169","https://openalex.org/W2385135707","https://openalex.org/W2140315382","https://openalex.org/W2059109728","https://openalex.org/W322691623","https://openalex.org/W2494989134","https://openalex.org/W2969228573","https://openalex.org/W2963690996"],"abstract_inverted_index":{"Abstract":[0],"Modeling":[1],"in":[2,15,24,41,66,80,112,151,186,212],"computer":[3],"vision":[4,43,46,99,140,147,163,214,228],"has":[5],"long":[6],"been":[7],"dominated":[8],"by":[9],"convolutional":[10],"neural":[11],"networks":[12],"(CNNs).":[13],"Recently,":[14],"light":[16],"of":[17,21,63,92,98,143,161,197],"the":[18,25,61,81,93,113,132,159,198,222],"excellent":[19],"performance":[20,239],"self-attention":[22],"mechanism":[23],"language":[26],"field,":[27],"transformers":[28,47],"tailored":[29],"for":[30],"visual":[31],"data":[32],"have":[33,116],"drawn":[34],"significant":[35],"attention":[36],"and":[37,77,95,101,179,181,206],"triumphed":[38],"over":[39],"CNNs":[40,114,119,137],"various":[42],"tasks.":[44],"These":[45],"heavily":[48],"rely":[49],"on":[50,221],"large-scale":[51],"pre-training":[52],"to":[53,89,138,157,175,240],"achieve":[54,237],"competitive":[55],"accuracy,":[56],"which":[57],"not":[58,128],"only":[59],"hinders":[60],"freedom":[62],"architectural":[64,204],"design":[65],"downstream":[67],"tasks":[68],"like":[69],"object":[70,105],"detection,":[71],"but":[72],"also":[73,236],"causes":[74],"learning":[75],"bias":[76],"domain":[78],"mismatch":[79],"fine-tuning":[82],"stages.":[83],"To":[84],"this":[85,152],"end,":[86],"we":[87,171],"aim":[88],"get":[90],"rid":[91],"\u201cpre-train":[94],"fine-tune\u201d":[96],"paradigm":[97],"transformer":[100,103,148,164,215,229],"train":[102],"based":[104,120,149,165,216,230],"detector":[106],"from":[107,136,167,218,233],"scratch.":[108,168,219],"Some":[109],"earlier":[110],"works":[111],"era":[115],"successfully":[117],"trained":[118,232],"detectors":[121,166,217,231],"without":[122],"pre-training,":[123,194],"unfortunately,":[124],"their":[125,241],"findings":[126,200],"do":[127],"generalize":[129],"well":[130],"when":[131],"backbone":[133],"is":[134,156,201],"switched":[135],"a":[139,145],"transformer.":[141],"Instead":[142],"proposing":[144],"specific":[146],"detector,":[150],"work,":[153],"our":[154],"goal":[155],"reveal":[158],"insights":[160,174],"training":[162,213],"In":[169],"particular,":[170],"expect":[172],"those":[173],"help":[176],"other":[177,187],"researchers":[178],"practitioners,":[180],"inspire":[182],"more":[183,207],"interesting":[184],"research":[185],"fields,":[188],"such":[189],"as":[190],"remote":[191],"sensing,":[192],"visual-linguistic":[193],"etc.":[195],"One":[196],"key":[199],"that":[202,227],"both":[203],"changes":[205],"epochs":[208],"play":[209],"critical":[210],"roles":[211],"Experiments":[220],"MS":[223],"COCO":[224],"dataset":[225],"demonstrate":[226],"scratch":[234],"can":[235],"similar":[238],"counterparts":[242],"with":[243],"ImageNet":[244],"pre-training.":[245]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1}],"updated_date":"2025-12-31T23:11:33.660297","created_date":"2025-10-10T00:00:00"}
