{"id":"https://openalex.org/W4392864334","doi":"https://doi.org/10.1145/3649896","title":"Realizing Efficient On-Device Language-based Image Retrieval","display_name":"Realizing Efficient On-Device Language-based Image Retrieval","publication_year":2024,"publication_date":"2024-03-15","ids":{"openalex":"https://openalex.org/W4392864334","doi":"https://doi.org/10.1145/3649896"},"language":"en","primary_location":{"id":"doi:10.1145/3649896","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3649896","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3649896","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"bronze","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3649896","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102825299","display_name":"Zhiming Hu","orcid":"https://orcid.org/0000-0002-5465-2819"},"institutions":[{"id":"https://openalex.org/I4210123574","display_name":"Centre for Social Innovation","ror":"https://ror.org/03trz9s27","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210123574"]}],"countries":["CA"],"is_corresponding":true,"raw_author_name":"Zhiming Hu","raw_affiliation_strings":["Samsung AI Centre, Toronto, Canada"],"raw_orcid":"https://orcid.org/0000-0002-5465-2819","affiliations":[{"raw_affiliation_string":"Samsung AI Centre, Toronto, Canada","institution_ids":["https://openalex.org/I4210123574"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043133820","display_name":"Mete Kemertas","orcid":"https://orcid.org/0009-0007-1314-1602"},"institutions":[{"id":"https://openalex.org/I185261750","display_name":"University of Toronto","ror":"https://ror.org/03dbr7087","country_code":"CA","type":"education","lineage":["https://openalex.org/I185261750"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Mete Kemertas","raw_affiliation_strings":["University of Toronto, Toronto, Canada"],"raw_orcid":"https://orcid.org/0009-0007-1314-1602","affiliations":[{"raw_affiliation_string":"University of Toronto, Toronto, Canada","institution_ids":["https://openalex.org/I185261750"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115393939","display_name":"Lan Xiao","orcid":"https://orcid.org/0009-0007-0265-0727"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lan Xiao","raw_affiliation_strings":["Meta, Toronto, Canada"],"raw_orcid":"https://orcid.org/0009-0007-0265-0727","affiliations":[{"raw_affiliation_string":"Meta, Toronto, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115393943","display_name":"Caleb Phillips","orcid":"https://orcid.org/0009-0008-9612-8821"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Caleb Phillips","raw_affiliation_strings":["Recursion, Toronto, Canada"],"raw_orcid":"https://orcid.org/0009-0008-9612-8821","affiliations":[{"raw_affiliation_string":"Recursion, Toronto, Canada","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025164458","display_name":"Iqbal Mohomed","orcid":"https://orcid.org/0009-0008-0598-8966"},"institutions":[{"id":"https://openalex.org/I4210123574","display_name":"Centre for Social Innovation","ror":"https://ror.org/03trz9s27","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210123574"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Iqbal Mohomed","raw_affiliation_strings":["Samsung AI Centre, Toronto, Canada"],"raw_orcid":"https://orcid.org/0009-0008-0598-8966","affiliations":[{"raw_affiliation_string":"Samsung AI Centre, Toronto, Canada","institution_ids":["https://openalex.org/I4210123574"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5017305009","display_name":"Afsaneh Fazly","orcid":"https://orcid.org/0000-0002-4479-4901"},"institutions":[{"id":"https://openalex.org/I4210123574","display_name":"Centre for Social Innovation","ror":"https://ror.org/03trz9s27","country_code":"CA","type":"nonprofit","lineage":["https://openalex.org/I4210123574"]}],"countries":["CA"],"is_corresponding":false,"raw_author_name":"Afsaneh Fazly","raw_affiliation_strings":["Samsung AI Centre, Toronto, Canada"],"raw_orcid":"https://orcid.org/0000-0002-4479-4901","affiliations":[{"raw_affiliation_string":"Samsung AI Centre, Toronto, Canada","institution_ids":["https://openalex.org/I4210123574"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102825299"],"corresponding_institution_ids":["https://openalex.org/I4210123574"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.02419434,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"20","issue":"9","first_page":"1","last_page":"18"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9983999729156494,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8773491382598877},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5703369379043579},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.5678647756576538},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.5309309959411621},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.520719587802887},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.44873130321502686},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.44793686270713806},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.41547682881355286},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4112367033958435},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38494986295700073},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.25668132305145264}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8773491382598877},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5703369379043579},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.5678647756576538},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.5309309959411621},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.520719587802887},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.44873130321502686},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.44793686270713806},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.41547682881355286},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4112367033958435},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38494986295700073},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.25668132305145264},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3649896","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3649896","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3649896","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":{"id":"doi:10.1145/3649896","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3649896","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3649896","source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4392864334.pdf","grobid_xml":"https://content.openalex.org/works/W4392864334.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W2014987111","https://openalex.org/W2164598857","https://openalex.org/W2185175083","https://openalex.org/W2250539671","https://openalex.org/W2508429489","https://openalex.org/W2559655401","https://openalex.org/W2604272474","https://openalex.org/W2612690371","https://openalex.org/W2745461083","https://openalex.org/W2752236330","https://openalex.org/W2962964995","https://openalex.org/W2963341956","https://openalex.org/W2988823324","https://openalex.org/W3035588244","https://openalex.org/W3090449556","https://openalex.org/W3091588028","https://openalex.org/W3139017368","https://openalex.org/W3166304536","https://openalex.org/W3171668871","https://openalex.org/W3213100861","https://openalex.org/W4212774754","https://openalex.org/W4290097518","https://openalex.org/W4387968399"],"related_works":["https://openalex.org/W2188500270","https://openalex.org/W2303858293","https://openalex.org/W2915512527","https://openalex.org/W51364034","https://openalex.org/W2793336762","https://openalex.org/W2091548507","https://openalex.org/W2368816706","https://openalex.org/W3159414774","https://openalex.org/W4385728102","https://openalex.org/W2378093739"],"abstract_inverted_index":{"Advances":[0],"in":[1,15,25,127,168,251],"deep":[2],"learning":[3,56],"have":[4],"enabled":[5],"accurate":[6],"language-based":[7,173],"search":[8,43,285],"and":[9,68,104,186,223,286],"retrieval":[10,50,80,149,163],"(e.g.,":[11],"over":[12,259],"user":[13],"photos)":[14],"the":[16,26,73,137,162,199,207,239,260],"cloud.":[17],"Many":[18],"users":[19],"prefer":[20],"to":[21,29,125,182,225,256,283],"store":[22],"their":[23],"photos":[24],"home":[27],"due":[28],"privacy":[30],"concerns.":[31],"As":[32],"such,":[33],"a":[34,65,77,85,99,128,132,156,184,191,196,215,226,252,280],"need":[35],"arises":[36],"for":[37,142,146,171,229,238],"models":[38,51,237],"that":[39,59,90,159,248],"can":[40],"perform":[41],"cross-modal":[42,49,148],"on":[44,150,243],"resource-limited":[45],"devices.":[46,152],"State-of-the-art":[47],"(SOTA)":[48],"achieve":[52],"high":[53,79],"accuracy":[54,170],"through":[55],"entangled":[57],"representations":[58],"enable":[60],"fine-grained":[61],"similarity":[62],"calculation":[63],"between":[64],"language":[66,197],"query":[67],"an":[69,105],"image,":[70],"but":[71,97],"at":[72],"expense":[74],"of":[75,88,107,117,119,136,206,217,254,275],"having":[76],"prohibitively":[78],"latency.":[81],"Alternatively,":[82],"there":[83],"is":[84,181],"new":[86],"class":[87],"methods":[89,139,263],"exhibits":[91],"good":[92],"performance":[93,266],"with":[94,165,190,234,264],"low":[95],"latency":[96,164],"requires":[98],"lot":[100],"more":[101,109],"computational":[102],"resources":[103],"order":[106],"magnitude":[108],"training":[110,276],"data":[111],"(i.e.,":[112],"large":[113],"web-scraped":[114],"datasets":[115,246],"consisting":[116],"millions":[118,274],"image\u2013caption":[120],"pairs),":[121],"making":[122,278],"them":[123],"infeasible":[124],"use":[126],"commercial":[129,144],"context.":[130],"From":[131],"pragmatic":[133,281],"perspective,":[134],"none":[135],"existing":[138],"are":[140],"suitable":[141],"developing":[143],"applications":[145],"low-latency":[147],"low-resource":[151],"We":[153],"propose":[154],"CrispSearch,":[155],"cascaded":[157],"approach":[158,180],"greatly":[160],"reduces":[161],"minimal":[166],"loss":[167],"ranking":[169],"on-device":[172,284],"image":[174,209],"retrieval.":[175,287],"The":[176],"idea":[177],"behind":[178],"our":[179,269],"combine":[183],"light-weight":[185],"runtime-efficient":[187],"coarse":[188,200],"model":[189,201,228],"fine":[192,227,240,262],"re-ranking":[193,241],"stage.":[194],"Given":[195],"query,":[198],"effectively":[202],"filters":[203],"out":[204],"many":[205],"irrelevant":[208],"candidates.":[210],"After":[211],"this":[212],"filtering,":[213],"only":[214],"handful":[216],"strong":[218],"candidates":[219],"will":[220],"be":[221],"selected":[222],"sent":[224],"re-ranking.":[230],"Extensive":[231],"experimental":[232],"results":[233,250],"two":[235],"SOTA":[236,261],"stage":[242],"standard":[244],"benchmark":[245],"show":[247],"CrispSearch":[249],"speedup":[253],"up":[255],"38":[257],"times":[258],"negligible":[265],"degradation.":[267],"Moreover,":[268],"method":[270],"does":[271],"not":[272],"require":[273],"instances,":[277],"it":[279],"solution":[282]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
