{"id":"https://openalex.org/W4379984335","doi":"https://doi.org/10.1109/tcsvt.2023.3284453","title":"Enhancing Representation Learning With Spatial Transformation and Early Convolution for Reinforcement Learning-Based Small Object Detection","display_name":"Enhancing Representation Learning With Spatial Transformation and Early Convolution for Reinforcement Learning-Based Small Object Detection","publication_year":2023,"publication_date":"2023-06-09","ids":{"openalex":"https://openalex.org/W4379984335","doi":"https://doi.org/10.1109/tcsvt.2023.3284453"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2023.3284453","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3284453","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5030374805","display_name":"Fen Fang","orcid":"https://orcid.org/0000-0002-3834-4795"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":true,"raw_author_name":"Fen Fang","raw_affiliation_strings":["Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080546069","display_name":"Wenyu Liang","orcid":"https://orcid.org/0000-0003-0278-2723"},"institutions":[{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]},{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Wenyu Liang","raw_affiliation_strings":["Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5001675146","display_name":"Yi Cheng","orcid":"https://orcid.org/0000-0002-5940-0581"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Yi Cheng","raw_affiliation_strings":["Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5052143634","display_name":"Qianli Xu","orcid":"https://orcid.org/0000-0003-0105-5903"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Qianli Xu","raw_affiliation_strings":["Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077258552","display_name":"Joo\u2010Hwee Lim","orcid":"https://orcid.org/0000-0002-4103-3824"},"institutions":[{"id":"https://openalex.org/I115228651","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09","country_code":"SG","type":"government","lineage":["https://openalex.org/I115228651"]},{"id":"https://openalex.org/I172675005","display_name":"Nanyang Technological University","ror":"https://ror.org/02e7b5302","country_code":"SG","type":"education","lineage":["https://openalex.org/I172675005"]},{"id":"https://openalex.org/I3005327000","display_name":"Institute for Infocomm Research","ror":"https://ror.org/053rfa017","country_code":"SG","type":"facility","lineage":["https://openalex.org/I115228651","https://openalex.org/I3005327000","https://openalex.org/I91275662"]}],"countries":["SG"],"is_corresponding":false,"raw_author_name":"Joo-Hwee Lim","raw_affiliation_strings":["Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore","School of Computer Science and Engineering, Nanyang Technological University, Jurong West, Singapore"],"affiliations":[{"raw_affiliation_string":"Institute for Infocomm Research, A&#x002A;STAR, Fusionopolis, Singapore","institution_ids":["https://openalex.org/I3005327000","https://openalex.org/I115228651"]},{"raw_affiliation_string":"School of Computer Science and Engineering, Nanyang Technological University, Jurong West, Singapore","institution_ids":["https://openalex.org/I172675005"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5030374805"],"corresponding_institution_ids":["https://openalex.org/I115228651","https://openalex.org/I3005327000"],"apc_list":null,"apc_paid":null,"fwci":2.7057,"has_fulltext":false,"cited_by_count":22,"citation_normalized_percentile":{"value":0.92024442,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":99,"max":100},"biblio":{"volume":"34","issue":"1","first_page":"315","last_page":"328"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12111","display_name":"Industrial Vision Systems and Defect Detection","score":0.9866999983787537,"subfield":{"id":"https://openalex.org/subfields/2209","display_name":"Industrial and Manufacturing Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11099","display_name":"Autonomous Vehicle Technology and Safety","score":0.9861000180244446,"subfield":{"id":"https://openalex.org/subfields/2203","display_name":"Automotive Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7059237957000732},{"id":"https://openalex.org/keywords/reinforcement-learning","display_name":"Reinforcement learning","score":0.6014211773872375},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5925572514533997},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5657989978790283},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5294135808944702},{"id":"https://openalex.org/keywords/transformation","display_name":"Transformation (genetics)","score":0.48076245188713074},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.4564724862575531},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.4523783326148987},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.4500022530555725},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.43344929814338684},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4243295192718506},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1263490915298462}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7059237957000732},{"id":"https://openalex.org/C97541855","wikidata":"https://www.wikidata.org/wiki/Q830687","display_name":"Reinforcement learning","level":2,"score":0.6014211773872375},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5925572514533997},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5657989978790283},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5294135808944702},{"id":"https://openalex.org/C204241405","wikidata":"https://www.wikidata.org/wiki/Q461499","display_name":"Transformation (genetics)","level":3,"score":0.48076245188713074},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.4564724862575531},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.4523783326148987},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.4500022530555725},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.43344929814338684},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4243295192718506},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1263490915298462},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C104317684","wikidata":"https://www.wikidata.org/wiki/Q7187","display_name":"Gene","level":2,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C55493867","wikidata":"https://www.wikidata.org/wiki/Q7094","display_name":"Biochemistry","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2023.3284453","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2023.3284453","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G7777593022","display_name":null,"funder_award_id":"#A18A2b0046","funder_id":"https://openalex.org/F4320320696","funder_display_name":"Agency for Science, Technology and Research"}],"funders":[{"id":"https://openalex.org/F4320320696","display_name":"Agency for Science, Technology and Research","ror":"https://ror.org/036wvzt09"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":51,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W1885185971","https://openalex.org/W2031454541","https://openalex.org/W2031489346","https://openalex.org/W2041076459","https://openalex.org/W2056653303","https://openalex.org/W2193145675","https://openalex.org/W2242218935","https://openalex.org/W2469312016","https://openalex.org/W2768489488","https://openalex.org/W2798355657","https://openalex.org/W2799256316","https://openalex.org/W2800791174","https://openalex.org/W2804847616","https://openalex.org/W2883872876","https://openalex.org/W2885641989","https://openalex.org/W2903718489","https://openalex.org/W2924450865","https://openalex.org/W2924873663","https://openalex.org/W2958435308","https://openalex.org/W2963307811","https://openalex.org/W2963857746","https://openalex.org/W2963897760","https://openalex.org/W2988452521","https://openalex.org/W2989611864","https://openalex.org/W2990631821","https://openalex.org/W3008354258","https://openalex.org/W3009396058","https://openalex.org/W3009447447","https://openalex.org/W3011688396","https://openalex.org/W3034971973","https://openalex.org/W3036271496","https://openalex.org/W3094482290","https://openalex.org/W3106250896","https://openalex.org/W3138516171","https://openalex.org/W3158141244","https://openalex.org/W3175630421","https://openalex.org/W3201797941","https://openalex.org/W3210997132","https://openalex.org/W4214636423","https://openalex.org/W4226334005","https://openalex.org/W4285235844","https://openalex.org/W4289537802","https://openalex.org/W4293193211","https://openalex.org/W4312781996","https://openalex.org/W4312823573","https://openalex.org/W4386076325","https://openalex.org/W6687681856","https://openalex.org/W6751325469","https://openalex.org/W6797790494"],"related_works":["https://openalex.org/W4306904969","https://openalex.org/W2138720691","https://openalex.org/W4362501864","https://openalex.org/W4380318855","https://openalex.org/W3084456289","https://openalex.org/W2024136090","https://openalex.org/W4391331176","https://openalex.org/W2031695474","https://openalex.org/W2586732548","https://openalex.org/W2964765435"],"abstract_inverted_index":{"Although":[0],"object":[1,25,36,135,222,253],"detection":[2,37,136,169,206,223],"has":[3,103,184],"achieved":[4],"significant":[5],"progress":[6],"in":[7],"the":[8,21,55,86,120,147,193,201,205,212,218,226,233,237],"past":[9],"decade,":[10],"detecting":[11],"small":[12,35,123,160,172,252],"objects":[13,124,145,173],"is":[14,38,116],"still":[15],"far":[16],"from":[17],"satisfactory":[18],"due":[19],"to":[20,33,39,84,97,118,143],"high":[22,168],"variability":[23],"of":[24,57,79,122,130,214],"scales":[26],"and":[27,63,90,151,187,197,225,228,262],"complex":[28],"backgrounds.":[29],"The":[30,181],"common":[31],"way":[32],"enhance":[34,85],"use":[40],"high-resolution":[41],"(HR)":[42],"images.":[43,58],"However,":[44],"this":[45,163],"method":[46,102,183,203,235,247],"incurs":[47],"huge":[48],"computational":[49],"resources":[50],"which":[51],"grow":[52],"squarely":[53],"with":[54,94],"resolution":[56],"To":[59],"achieve":[60],"both":[61],"accuracy":[62,207],"efficiency,":[64],"we":[65,165],"propose":[66],"a":[67,80,91],"novel":[68],"reinforcement":[69],"learning":[70,89],"framework":[71],"that":[72],"employs":[73],"an":[74,113],"efficient":[75],"policy":[76],"network":[77],"consisting":[78],"Spatial":[81],"Transformation":[82],"Network":[83],"state":[87],"representation":[88],"Transformer":[92],"model":[93],"early":[95],"convolution":[96],"improve":[98],"feature":[99],"extraction.":[100],"Our":[101],"two":[104],"main":[105],"steps:":[106],"(1)":[107],"coarse":[108,149],"location":[109],"query":[110],"(CLQ),":[111],"where":[112,137],"RL":[114],"agent":[115],"trained":[117],"predict":[119],"locations":[121,150],"on":[125,146,155,171,178,189,251,260],"low-resolution":[126],"(LR)":[127],"(down-sampled":[128],"version":[129],"HR)":[131],"images;":[132],"(2)":[133],"context-sensitive":[134],"HR":[138],"image":[139,153],"patches":[140,154],"are":[141],"used":[142],"detect":[144],"selected":[148],"LR":[152],"background":[156,179],"areas":[157],"(containing":[158],"no":[159],"objects).":[161],"In":[162],"way,":[164],"can":[166],"obtain":[167],"performance":[170,259],"while":[174,210,255],"avoiding":[175],"unnecessary":[176],"computation":[177],"areas.":[180],"proposed":[182,202,234],"been":[185],"tested":[186],"benchmarked":[188],"various":[190],"datasets.":[191],"On":[192,217,241],"Caltech":[194],"Pedestrians":[195,199],"Detection":[196],"Web":[198],"datasets,":[200],"improves":[204],"by":[208],"2%,":[209],"reducing":[211],"number":[213],"processed":[215],"pixels.":[216],"Vision":[219],"meets":[220],"Drone":[221],"dataset":[224],"Oil":[227],"Gas":[229],"Storage":[230],"Tank":[231],"dataset,":[232],"outperforms":[236,248],"state-of-the-art":[238],"(SotA)":[239],"methods.":[240],"MS":[242],"COCO":[243],"mini-val":[244],"set,":[245],"our":[246],"SotA":[249],"methods":[250],"detection,":[254],"also":[256],"achieving":[257],"comparable":[258],"medium":[261],"large":[263],"objects.":[264]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":12},{"year":2024,"cited_by_count":8}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
