{"id":"https://openalex.org/W4390523442","doi":"https://doi.org/10.1145/3595916.3626394","title":"Adapting Hierarchical Transformer for Scene-Level Sketch-Based Image Retrieval","display_name":"Adapting Hierarchical Transformer for Scene-Level Sketch-Based Image Retrieval","publication_year":2023,"publication_date":"2023-12-06","ids":{"openalex":"https://openalex.org/W4390523442","doi":"https://doi.org/10.1145/3595916.3626394"},"language":"en","primary_location":{"id":"doi:10.1145/3595916.3626394","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626394","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626394","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626394","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101457381","display_name":"Jie Yang","orcid":"https://orcid.org/0000-0002-8386-5012"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jie Yang","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, CN"],"raw_orcid":"https://orcid.org/0000-0002-8386-5012","affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, CN","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5030651973","display_name":"Aihua Ke","orcid":"https://orcid.org/0000-0003-3638-7983"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Aihua Ke","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, CN"],"raw_orcid":"https://orcid.org/0000-0003-3638-7983","affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, CN","institution_ids":["https://openalex.org/I37461747"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5024856110","display_name":"Bo Cai","orcid":"https://orcid.org/0000-0001-5261-0191"},"institutions":[{"id":"https://openalex.org/I37461747","display_name":"Wuhan University","ror":"https://ror.org/033vjfk17","country_code":"CN","type":"education","lineage":["https://openalex.org/I37461747"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Cai","raw_affiliation_strings":["Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, CN"],"raw_orcid":"https://orcid.org/0000-0001-5261-0191","affiliations":[{"raw_affiliation_string":"Key Laboratory of Aerospace Information Security and Trusted Computing, Ministry of Education, School of Cyber Science and Engineering, Wuhan University, CN","institution_ids":["https://openalex.org/I37461747"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101457381"],"corresponding_institution_ids":["https://openalex.org/I37461747"],"apc_list":null,"apc_paid":null,"fwci":0.1177,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":{"value":0.45150218,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"7"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.804803729057312},{"id":"https://openalex.org/keywords/sketch","display_name":"Sketch","score":0.6240185499191284},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6101606488227844},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5682588219642639},{"id":"https://openalex.org/keywords/overfitting","display_name":"Overfitting","score":0.553298830986023},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.48084840178489685},{"id":"https://openalex.org/keywords/feature-vector","display_name":"Feature vector","score":0.4401479959487915},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.43742096424102783},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.43553924560546875},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.4343366324901581},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4260128140449524},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3772585391998291},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.3160032629966736},{"id":"https://openalex.org/keywords/algorithm","display_name":"Algorithm","score":0.10724806785583496},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.07080239057540894}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.804803729057312},{"id":"https://openalex.org/C2779231336","wikidata":"https://www.wikidata.org/wiki/Q7534724","display_name":"Sketch","level":2,"score":0.6240185499191284},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6101606488227844},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5682588219642639},{"id":"https://openalex.org/C22019652","wikidata":"https://www.wikidata.org/wiki/Q331309","display_name":"Overfitting","level":3,"score":0.553298830986023},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.48084840178489685},{"id":"https://openalex.org/C83665646","wikidata":"https://www.wikidata.org/wiki/Q42139305","display_name":"Feature vector","level":2,"score":0.4401479959487915},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.43742096424102783},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.43553924560546875},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.4343366324901581},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4260128140449524},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3772585391998291},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.3160032629966736},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.10724806785583496},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.07080239057540894},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.0},{"id":"https://openalex.org/C119599485","wikidata":"https://www.wikidata.org/wiki/Q43035","display_name":"Electrical engineering","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3595916.3626394","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626394","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626394","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3595916.3626394","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3595916.3626394","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3595916.3626394","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Multimedia Asia 2023","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4390523442.pdf","grobid_xml":"https://content.openalex.org/works/W4390523442.grobid-xml"},"referenced_works_count":20,"referenced_works":["https://openalex.org/W1975771248","https://openalex.org/W2048546747","https://openalex.org/W2054508433","https://openalex.org/W2098807270","https://openalex.org/W2117539524","https://openalex.org/W2128543433","https://openalex.org/W2151103935","https://openalex.org/W2153404544","https://openalex.org/W2161969291","https://openalex.org/W2183341477","https://openalex.org/W2466618734","https://openalex.org/W2467281799","https://openalex.org/W2507296351","https://openalex.org/W2511925527","https://openalex.org/W2561196672","https://openalex.org/W2776402438","https://openalex.org/W3035124078","https://openalex.org/W3170544306","https://openalex.org/W3211490618","https://openalex.org/W4281251718"],"related_works":["https://openalex.org/W4362597605","https://openalex.org/W1574414179","https://openalex.org/W4297676672","https://openalex.org/W3009056573","https://openalex.org/W2922073769","https://openalex.org/W4281702477","https://openalex.org/W2490526372","https://openalex.org/W4376166922","https://openalex.org/W2185495922","https://openalex.org/W2123991572"],"abstract_inverted_index":{"Sketch-based":[0],"image":[1],"retrieval":[2],"(SBIR)":[3],"is":[4,14,25],"an":[5],"essential":[6],"application":[7],"of":[8,20,49,59,93,117,145],"sketches.":[9],"Research":[10],"on":[11,162],"object-level":[12],"SBIR":[13,24,81],"relatively":[15],"mature,":[16],"but":[17,148],"the":[18,64,86,109,114,122,128,135,143],"study":[19],"more":[21],"complex":[22],"scene-level":[23,80],"still":[26],"in":[27],"its":[28],"early":[29],"stages.":[30],"In":[31,83],"order":[32],"to":[33,62],"advance":[34],"this":[35],"research,":[36],"we":[37,74],"investigate":[38],"previous":[39],"works":[40],"and":[41,54,56,89,101,108,119],"identify":[42],"two":[43,163],"main":[44],"shortcomings:":[45],"(1)":[46],"insufficient":[47],"utilization":[48],"multi-scale":[50],"features":[51],"from":[52],"sketches":[53,118],"images,":[55],"(2)":[57],"lack":[58],"effective":[60],"modules":[61],"eliminate":[63],"substantial":[65],"domain":[66,129],"gap":[67,130],"between":[68,131],"them.":[69,132],"To":[70],"address":[71],"these":[72],"issues,":[73],"propose":[75],"SketchRetriever,":[76],"a":[77],"hierarchical":[78,87],"Transformer-based":[79],"model.":[82],"our":[84],"model,":[85],"Transformer":[88],"compressors":[90],"are":[91],"capable":[92],"efficiently":[94],"capturing":[95],"feature":[96,106,115,124],"maps":[97],"at":[98],"various":[99],"granularities":[100],"compressing":[102],"them":[103],"into":[104,121],"corresponding":[105],"vectors,":[107],"modality-specific":[110],"Adapters":[111],"can":[112],"project":[113],"embeddings":[116],"images":[120],"same":[123],"space,":[125],"thereby":[126],"closing":[127],"We":[133],"adopt":[134],"adapter-tuning":[136],"strategy,":[137],"which":[138],"not":[139],"only":[140],"considerably":[141],"reduces":[142],"number":[144],"tunable":[146],"parameters":[147],"also":[149],"effectively":[150],"avoids":[151],"overfitting.":[152],"Extensive":[153],"experiments":[154],"demonstrate":[155],"that":[156],"SketchRetriever":[157],"significantly":[158],"outperforms":[159],"state-of-the-art":[160],"methods":[161],"benchmark":[164],"datasets":[165],"with":[166],"lower":[167],"fine-tuning":[168],"overhead.":[169]},"counts_by_year":[{"year":2025,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
