{"id":"https://openalex.org/W7164836340","doi":"https://doi.org/10.1145/3805622.3810593","title":"Learning Where to Embed: Noise-Aware Positional Embedding for Query Retrieval in Small-Object Detection","display_name":"Learning Where to Embed: Noise-Aware Positional Embedding for Query Retrieval in Small-Object Detection","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164836340","doi":"https://doi.org/10.1145/3805622.3810593"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810593","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810593","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810593","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5037374299","display_name":"Yi Zeng","orcid":"https://orcid.org/0000-0002-0899-3786"},"institutions":[{"id":"https://openalex.org/I76569877","display_name":"Southeast University","ror":"https://ror.org/04ct4d772","country_code":"CN","type":"education","lineage":["https://openalex.org/I76569877"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yangchen Zeng","raw_affiliation_strings":["Southeast University, Nanjing, China"],"raw_orcid":"https://orcid.org/0009-0009-1956-4029","affiliations":[{"raw_affiliation_string":"Southeast University, Nanjing, China","institution_ids":["https://openalex.org/I76569877"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100433332","display_name":"Zhenyu Yu","orcid":"https://orcid.org/0000-0002-9985-0165"},"institutions":[{"id":"https://openalex.org/I24943067","display_name":"Fudan University","ror":"https://ror.org/013q1eq08","country_code":"CN","type":"education","lineage":["https://openalex.org/I24943067"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenyu Yu","raw_affiliation_strings":["Fudan University, Shanghai, China"],"raw_orcid":"https://orcid.org/0000-0002-9985-0165","affiliations":[{"raw_affiliation_string":"Fudan University, Shanghai, China","institution_ids":["https://openalex.org/I24943067"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5134149018","display_name":"Dongming Jiang","orcid":null},"institutions":[{"id":"https://openalex.org/I162577319","display_name":"The University of Texas at Dallas","ror":"https://ror.org/049emcs32","country_code":"US","type":"education","lineage":["https://openalex.org/I162577319"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dongming Jiang","raw_affiliation_strings":["The University of Texas at Dallas, Dallas, USA"],"raw_orcid":"https://orcid.org/0009-0004-2691-5046","affiliations":[{"raw_affiliation_string":"The University of Texas at Dallas, Dallas, USA","institution_ids":["https://openalex.org/I162577319"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100441502","display_name":"Wei Zhang","orcid":"https://orcid.org/0000-0001-7800-3189"},"institutions":[{"id":"https://openalex.org/I163151501","display_name":"Hangzhou Normal University","ror":"https://ror.org/014v1mr15","country_code":"CN","type":"education","lineage":["https://openalex.org/I163151501"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wenbo Zhang","raw_affiliation_strings":["Zhejiang Normal university, Hangzhou, China"],"raw_orcid":"https://orcid.org/0009-0002-6269-707X","affiliations":[{"raw_affiliation_string":"Zhejiang Normal university, Hangzhou, China","institution_ids":["https://openalex.org/I163151501"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102798661","display_name":"Yifan Hong","orcid":"https://orcid.org/0000-0002-4702-0987"},"institutions":[{"id":"https://openalex.org/I4210137491","display_name":"National Science Centre","ror":"https://ror.org/03ha2q922","country_code":"PL","type":"government","lineage":["https://openalex.org/I4210137491"]}],"countries":["PL"],"is_corresponding":false,"raw_author_name":"Yifan Hong","raw_affiliation_strings":["Data Space Research Institute, Hefei Comprehensive National Science Center, Hefei, China"],"raw_orcid":"https://orcid.org/0000-0002-4702-0987","affiliations":[{"raw_affiliation_string":"Data Space Research Institute, Hefei Comprehensive National Science Center, Hefei, China","institution_ids":["https://openalex.org/I4210137491"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133853564","display_name":"Zhanhua Hu","orcid":null},"institutions":[{"id":"https://openalex.org/I74775410","display_name":"Rice University","ror":"https://ror.org/008zs3103","country_code":"US","type":"education","lineage":["https://openalex.org/I74775410"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhanhua Hu","raw_affiliation_strings":["Rice university, Houston, USA"],"raw_orcid":"https://orcid.org/0009-0007-6913-0224","affiliations":[{"raw_affiliation_string":"Rice university, Houston, USA","institution_ids":["https://openalex.org/I74775410"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138542946","display_name":"Jiao Luo","orcid":"https://orcid.org/0009-0002-5786-7468"},"institutions":[{"id":"https://openalex.org/I204823248","display_name":"Huazhong Agricultural University","ror":"https://ror.org/023b72294","country_code":"CN","type":"education","lineage":["https://openalex.org/I204823248"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiao Luo","raw_affiliation_strings":["College of Informatics, Huazhong Agricultural University, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0002-5786-7468","affiliations":[{"raw_affiliation_string":"College of Informatics, Huazhong Agricultural University, Wuhan, China","institution_ids":["https://openalex.org/I204823248"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5080932443","display_name":"Kangning Cui","orcid":"https://orcid.org/0000-0002-1856-5064"},"institutions":[{"id":"https://openalex.org/I168719708","display_name":"City University of Hong Kong","ror":"https://ror.org/03q8dnn23","country_code":"HK","type":"education","lineage":["https://openalex.org/I168719708"]},{"id":"https://openalex.org/I2799850029","display_name":"Dongguan University of Technology","ror":"https://ror.org/01m8p7q42","country_code":"CN","type":"education","lineage":["https://openalex.org/I2799850029"]},{"id":"https://openalex.org/I47251452","display_name":"Wake Forest University","ror":"https://ror.org/0207ad724","country_code":"US","type":"education","lineage":["https://openalex.org/I47251452"]}],"countries":["CN","HK","US"],"is_corresponding":false,"raw_author_name":"Kangning Cui","raw_affiliation_strings":["Wake Forest University, Winston-Salem, USA and City University of Hong Kong (Dongguan), Dongguan, China"],"raw_orcid":"https://orcid.org/0000-0002-1856-5064","affiliations":[{"raw_affiliation_string":"Wake Forest University, Winston-Salem, USA and City University of Hong Kong (Dongguan), Dongguan, China","institution_ids":["https://openalex.org/I168719708","https://openalex.org/I2799850029","https://openalex.org/I47251452"]}]}],"institutions":[],"countries_distinct_count":4,"institutions_distinct_count":8,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.93723873,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1260","last_page":"1269"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6894000172615051,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10036","display_name":"Advanced Neural Network Applications","score":0.6894000172615051,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.10220000147819519,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.08590000122785568,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.7717999815940857},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6504999995231628},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.5942000150680542},{"id":"https://openalex.org/keywords/filter","display_name":"Filter (signal processing)","score":0.5828999876976013},{"id":"https://openalex.org/keywords/encoding","display_name":"Encoding (memory)","score":0.571399986743927},{"id":"https://openalex.org/keywords/convolution","display_name":"Convolution (computer science)","score":0.5507000088691711},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4726000130176544},{"id":"https://openalex.org/keywords/decoding-methods","display_name":"Decoding methods","score":0.43639999628067017},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4300999939441681}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8101999759674072},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.7717999815940857},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6504999995231628},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.5942000150680542},{"id":"https://openalex.org/C106131492","wikidata":"https://www.wikidata.org/wiki/Q3072260","display_name":"Filter (signal processing)","level":2,"score":0.5828999876976013},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5737000107765198},{"id":"https://openalex.org/C125411270","wikidata":"https://www.wikidata.org/wiki/Q18653","display_name":"Encoding (memory)","level":2,"score":0.571399986743927},{"id":"https://openalex.org/C45347329","wikidata":"https://www.wikidata.org/wiki/Q5166604","display_name":"Convolution (computer science)","level":3,"score":0.5507000088691711},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4726000130176544},{"id":"https://openalex.org/C57273362","wikidata":"https://www.wikidata.org/wiki/Q576722","display_name":"Decoding methods","level":2,"score":0.43639999628067017},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4300999939441681},{"id":"https://openalex.org/C45374587","wikidata":"https://www.wikidata.org/wiki/Q12525525","display_name":"Computation","level":2,"score":0.42559999227523804},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.42340001463890076},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.38370001316070557},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.376800000667572},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.3720000088214874},{"id":"https://openalex.org/C111335779","wikidata":"https://www.wikidata.org/wiki/Q3454686","display_name":"Reduction (mathematics)","level":2,"score":0.3158999979496002},{"id":"https://openalex.org/C2164484","wikidata":"https://www.wikidata.org/wiki/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.3089999854564667},{"id":"https://openalex.org/C99016210","wikidata":"https://www.wikidata.org/wiki/Q5488129","display_name":"Query expansion","level":2,"score":0.3043999969959259},{"id":"https://openalex.org/C43126263","wikidata":"https://www.wikidata.org/wiki/Q128751","display_name":"Source code","level":2,"score":0.29910001158714294},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.2944999933242798},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.2896000146865845},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.2833000123500824},{"id":"https://openalex.org/C2777735758","wikidata":"https://www.wikidata.org/wiki/Q817765","display_name":"Path (computing)","level":2,"score":0.2799000144004822},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.27630001306533813},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.25690001249313354},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.2547000050544739},{"id":"https://openalex.org/C70518039","wikidata":"https://www.wikidata.org/wiki/Q16000077","display_name":"Dimensionality reduction","level":2,"score":0.25440001487731934},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.2531999945640564}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810593","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810593","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810593","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810593","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":48,"referenced_works":["https://openalex.org/W2031489346","https://openalex.org/W2193145675","https://openalex.org/W2194775991","https://openalex.org/W2512351403","https://openalex.org/W2612624696","https://openalex.org/W2963150697","https://openalex.org/W2963351448","https://openalex.org/W2964241181","https://openalex.org/W2966926453","https://openalex.org/W2982083293","https://openalex.org/W2982770724","https://openalex.org/W2989604896","https://openalex.org/W3022917557","https://openalex.org/W3088748863","https://openalex.org/W3096609285","https://openalex.org/W3131500599","https://openalex.org/W3138516171","https://openalex.org/W3216569295","https://openalex.org/W4214493665","https://openalex.org/W4214627427","https://openalex.org/W4307411363","https://openalex.org/W4312312588","https://openalex.org/W4319166312","https://openalex.org/W4327785494","https://openalex.org/W4386076325","https://openalex.org/W4390873988","https://openalex.org/W4391841967","https://openalex.org/W4394595727","https://openalex.org/W4395703098","https://openalex.org/W4399749893","https://openalex.org/W4401749621","https://openalex.org/W4401750691","https://openalex.org/W4402754006","https://openalex.org/W4402856397","https://openalex.org/W4403780796","https://openalex.org/W4403791737","https://openalex.org/W4411824119","https://openalex.org/W4412751299","https://openalex.org/W4414360319","https://openalex.org/W4414865988","https://openalex.org/W4417302572","https://openalex.org/W7125934538","https://openalex.org/W7128696615","https://openalex.org/W7137806425","https://openalex.org/W7138906118","https://openalex.org/W7151791908","https://openalex.org/W7155085120","https://openalex.org/W7159797400"],"related_works":[],"abstract_inverted_index":{"Transformer-based":[0],"detectors":[1],"have":[2],"advanced":[3],"small-object":[4],"detection,":[5],"but":[6],"they":[7],"often":[8],"remain":[9],"inefficient":[10],"and":[11,69,78,87,99,160],"vulnerable":[12],"to":[13,21,40,128,158],"background-induced":[14],"query":[15,103],"noise,":[16],"which":[17],"motivates":[18],"deep":[19],"decoders":[20],"refine":[22],"low-quality":[23],"queries.":[24],"We":[25],"present":[26],"HELP":[27],"(Heatmap-guided":[28],"Embedding":[29,62],"Learning":[30],"Paradigm),":[31],"a":[32,73,110,149,162,175],"noise-aware":[33],"positional-semantic":[34],"fusion":[35],"framework":[36],"that":[37],"studies":[38],"where":[39],"embed":[41],"positional":[42,47,97],"information":[43],"by":[44,94,105],"selectively":[45],"preserving":[46],"encodings":[48],"in":[49,120],"foreground-salient":[50],"regions":[51],"while":[52,169],"suppressing":[53],"background":[54],"clutter.":[55],"Within":[56],"HELP,":[57],"we":[58,124],"introduce":[59],"Heatmap-guided":[60],"Positional":[61],"(HPE)":[63],"as":[64],"the":[65,85],"core":[66],"embedding":[67],"mechanism":[68],"visualize":[70],"it":[71,89,100],"with":[72],"heatbar":[74],"for":[75],"interpretable":[76],"diagnosis":[77],"fine-tuning.":[79],"HPE":[80],"is":[81,136],"integrated":[82],"into":[83],"both":[84],"encoder":[86],"decoder:":[88],"guides":[90],"noise-suppressed":[91],"feature":[92,118],"encoding":[93],"injecting":[95],"heatmap-aware":[96],"encoding,":[98],"enables":[101],"high-quality":[102],"retrieval":[104],"filtering":[106],"background-dominant":[107],"embeddings":[108],"via":[109],"gradient-based":[111,133],"mask":[112],"filter":[113],"before":[114],"decoding.":[115],"To":[116],"address":[117],"sparsity":[119],"complex":[121],"small":[122],"targets,":[123],"integrate":[125],"Linear-Snake":[126],"Convolution":[127],"enrich":[129],"retrieval-relevant":[130],"representations.":[131],"The":[132],"heatmap":[134],"supervision":[135],"used":[137],"during":[138],"training":[139],"only,":[140],"incurring":[141],"no":[142],"additional":[143],"gradient":[144],"computation":[145],"at":[146],"inference.":[147],"As":[148],"result,":[150],"our":[151],"design":[152],"reduces":[153],"decoder":[154],"layers":[155],"from":[156],"eight":[157],"three":[159],"achieves":[161],"59.4%":[163],"parameter":[164],"reduction":[165],"(66.3M":[166],"vs.":[167],"163M)":[168],"maintaining":[170],"consistent":[171],"accuracy":[172],"gains":[173],"under":[174],"reduced":[176],"compute":[177],"budget":[178],"across":[179],"benchmarks.":[180],"Code":[181],"Repository:":[182],"https://github.com/yidimopozhibai/Noise-Suppressed-Query-Retrieval.":[183]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
