{"id":"https://openalex.org/W4401536805","doi":"https://doi.org/10.1109/tcsvt.2024.3443097","title":"A Lightweight Multi-Grained Image-Text Retrieval Paradigm via Cascaded Representation Learning and Parameter-Free Feature Aggregation","display_name":"A Lightweight Multi-Grained Image-Text Retrieval Paradigm via Cascaded Representation Learning and Parameter-Free Feature Aggregation","publication_year":2024,"publication_date":"2024-08-13","ids":{"openalex":"https://openalex.org/W4401536805","doi":"https://doi.org/10.1109/tcsvt.2024.3443097"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3443097","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3443097","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Chenyu Lu","orcid":"https://orcid.org/0009-0000-1644-2733"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chenyu Lu","raw_affiliation_strings":["School of Computer Science and Technology, East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100641132","display_name":"Nan Zhang","orcid":"https://orcid.org/0000-0001-9620-5665"},"institutions":[{"id":"https://openalex.org/I146620803","display_name":"Wenzhou University","ror":"https://ror.org/020hxh324","country_code":"CN","type":"education","lineage":["https://openalex.org/I146620803"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Nan Zhang","raw_affiliation_strings":["College of Computer Science and Artificial Intelligence, Wenzhou University, Wenzhou, China"],"affiliations":[{"raw_affiliation_string":"College of Computer Science and Artificial Intelligence, Wenzhou University, Wenzhou, China","institution_ids":["https://openalex.org/I146620803"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047846625","display_name":"Shiliang Sun","orcid":"https://orcid.org/0000-0001-7069-3752"},"institutions":[{"id":"https://openalex.org/I66867065","display_name":"East China Normal University","ror":"https://ror.org/02n96ep67","country_code":"CN","type":"education","lineage":["https://openalex.org/I66867065"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shiliang Sun","raw_affiliation_strings":["School of Computer Science and Technology, East China Normal University, Shanghai, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Technology, East China Normal University, Shanghai, China","institution_ids":["https://openalex.org/I66867065"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I66867065"],"apc_list":null,"apc_paid":null,"fwci":0.49,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.63666663,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":"34","issue":"12","first_page":"13584","last_page":"13595"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9969000220298767,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9887999892234802,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9182000160217285,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7665300369262695},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.6395038962364197},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.6020540595054626},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.5404869318008423},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.5312416553497314},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5308462381362915},{"id":"https://openalex.org/keywords/feature-learning","display_name":"Feature learning","score":0.5301809310913086},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.45321860909461975},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.41011083126068115}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7665300369262695},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.6395038962364197},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.6020540595054626},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.5404869318008423},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.5312416553497314},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5308462381362915},{"id":"https://openalex.org/C59404180","wikidata":"https://www.wikidata.org/wiki/Q17013334","display_name":"Feature learning","level":2,"score":0.5301809310913086},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.45321860909461975},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.41011083126068115},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3443097","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3443097","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3322949638","display_name":null,"funder_award_id":"LY23F020002","funder_id":"https://openalex.org/F4320336614","funder_display_name":"Science and Technology Plan Project of Taizhou"},{"id":"https://openalex.org/G5796687318","display_name":null,"funder_award_id":"62076096","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320336614","display_name":"Science and Technology Plan Project of Taizhou","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W657714098","https://openalex.org/W1861492603","https://openalex.org/W1916445035","https://openalex.org/W1933349210","https://openalex.org/W2064675550","https://openalex.org/W2096733369","https://openalex.org/W2102605133","https://openalex.org/W2138621090","https://openalex.org/W2143017621","https://openalex.org/W2185175083","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2302086703","https://openalex.org/W2342543219","https://openalex.org/W2962964995","https://openalex.org/W2964216321","https://openalex.org/W2981165461","https://openalex.org/W2982078236","https://openalex.org/W2988823324","https://openalex.org/W2996478685","https://openalex.org/W2998215884","https://openalex.org/W3005881764","https://openalex.org/W3019124755","https://openalex.org/W3035454331","https://openalex.org/W3035552787","https://openalex.org/W3087975588","https://openalex.org/W3088460296","https://openalex.org/W3096612875","https://openalex.org/W3099614098","https://openalex.org/W3118694826","https://openalex.org/W3162090017","https://openalex.org/W3168433561","https://openalex.org/W3169472988","https://openalex.org/W3175888430","https://openalex.org/W3190055600","https://openalex.org/W4214819138","https://openalex.org/W4231896027","https://openalex.org/W4249013746","https://openalex.org/W4283812943","https://openalex.org/W4285200267","https://openalex.org/W4296079526","https://openalex.org/W4312761738","https://openalex.org/W4382998936","https://openalex.org/W4385245566","https://openalex.org/W4386025847","https://openalex.org/W4386072307","https://openalex.org/W4386710209","https://openalex.org/W4391825130","https://openalex.org/W4392189981","https://openalex.org/W6620707391","https://openalex.org/W6631190155","https://openalex.org/W6631516269","https://openalex.org/W6676647902","https://openalex.org/W6747225742","https://openalex.org/W6757817989"],"related_works":["https://openalex.org/W2062195135","https://openalex.org/W2795079307","https://openalex.org/W3147584709","https://openalex.org/W2905271011","https://openalex.org/W2793270624","https://openalex.org/W3164948662","https://openalex.org/W4289536128","https://openalex.org/W3153597579","https://openalex.org/W4298151006","https://openalex.org/W4309346246"],"abstract_inverted_index":{"Multi-grained":[0],"cross-modal":[1],"image-text":[2,51],"retrieval":[3,52,67],"models":[4],"have":[5],"demonstrated":[6],"promising":[7],"outcomes":[8],"through":[9],"the":[10,34,66,134,138,146,161],"alignment":[11],"of":[12,141,148],"local":[13],"and":[14,26,38,111,131,133,155,163],"global":[15],"features.":[16],"However,":[17],"this":[18,42],"advancement":[19],"often":[20],"results":[21,136],"in":[22,60],"larger":[23],"model":[24,57,118],"sizes":[25],"higher":[27],"computational":[28,87],"requirements,":[29],"which":[30],"raises":[31],"concerns":[32],"regarding":[33],"balance":[35],"between":[36],"performance":[37,140],"efficiency.":[39,58],"To":[40],"address":[41],"challenge,":[43],"we":[44,63,93,144],"introduce":[45],"a":[46,70,157],"novel":[47],"lightweight":[48],"multi-grained":[49],"(LMG)":[50],"paradigm":[53],"aimed":[54],"at":[55],"enhancing":[56],"Specifically,":[59],"our":[61],"approach,":[62],"first":[64],"re-frame":[65],"problem":[68],"as":[69],"cascaded":[71],"representation":[72],"learning":[73],"task.":[74],"This":[75],"involves":[76],"leveraging":[77],"only":[78],"fine-grained":[79,164],"features":[80],"to":[81],"capture":[82],"coarse-grained":[83],"constraints,":[84],"thereby":[85],"reducing":[86],"burden":[88],"while":[89],"maintaining":[90],"accuracy.":[91],"Furthermore,":[92],"replace":[94],"computationally":[95],"expensive":[96],"parametric":[97],"feature":[98,150],"aggregation":[99,151],"methods":[100,152],"with":[101,122],"three":[102],"efficient":[103],"parameter-free":[104],"alternatives:":[105],"auto-correlation":[106],"matrix,":[107],"discrete":[108,112],"linear":[109],"convolution,":[110],"Fourier":[113],"transform.":[114],"The":[115],"proposed":[116],"LMG":[117,154],"is":[119],"extensively":[120],"compared":[121],"state-of-the-art":[123],"approaches":[124],"on":[125,153,160],"two":[126],"benchmark":[127],"datasets,":[128],"i.e.,":[129],"Flickr30K":[130],"MSCOCO,":[132],"experimental":[135],"highlight":[137],"superior":[139],"LMG.":[142],"Additionally,":[143],"explore":[145],"impact":[147],"different":[149],"conduct":[156],"sensitivity":[158],"analysis":[159],"coarse":[162],"constraints":[165],"ratio":[166],"hyper-parameter.":[167]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
