{"id":"https://openalex.org/W4402769251","doi":"https://doi.org/10.1109/tcsvt.2024.3462100","title":"MCCE-REC: MLLM-Driven Cross-Modal Contrastive Entropy Model for Zero-Shot Referring Expression Comprehension","display_name":"MCCE-REC: MLLM-Driven Cross-Modal Contrastive Entropy Model for Zero-Shot Referring Expression Comprehension","publication_year":2024,"publication_date":"2024-09-16","ids":{"openalex":"https://openalex.org/W4402769251","doi":"https://doi.org/10.1109/tcsvt.2024.3462100"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2024.3462100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3462100","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5066434602","display_name":"Heqian Qiu","orcid":"https://orcid.org/0000-0002-0963-0311"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Heqian Qiu","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5067691968","display_name":"Lanxiao Wang","orcid":"https://orcid.org/0000-0002-3745-0262"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lanxiao Wang","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072221658","display_name":"Taijin Zhao","orcid":"https://orcid.org/0009-0008-5449-900X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Taijin Zhao","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100617043","display_name":"Fanman Meng","orcid":"https://orcid.org/0000-0002-3016-2567"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fanman Meng","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5075737786","display_name":"Qingbo Wu","orcid":"https://orcid.org/0000-0003-2936-6340"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qingbo Wu","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5114378292","display_name":"Hongliang Li","orcid":"https://orcid.org/0000-0002-7481-095X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongliang Li","raw_affiliation_strings":["School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"School of Information and Communication Engineering, University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5066434602"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":2.3905,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.9034517,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":98,"max":99},"biblio":{"volume":"35","issue":"1","first_page":"754","last_page":"768"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9939000010490417,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9700999855995178,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.6308097243309021},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.508141279220581},{"id":"https://openalex.org/keywords/zero","display_name":"Zero (linguistics)","score":0.4619465470314026},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.4561329483985901},{"id":"https://openalex.org/keywords/entropy","display_name":"Entropy (arrow of time)","score":0.45275449752807617},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37425696849823},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.37421369552612305},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3332255780696869},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.25677502155303955},{"id":"https://openalex.org/keywords/physics","display_name":"Physics","score":0.14403700828552246}],"concepts":[{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.6308097243309021},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.508141279220581},{"id":"https://openalex.org/C2780813799","wikidata":"https://www.wikidata.org/wiki/Q3274237","display_name":"Zero (linguistics)","level":2,"score":0.4619465470314026},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.4561329483985901},{"id":"https://openalex.org/C106301342","wikidata":"https://www.wikidata.org/wiki/Q4117933","display_name":"Entropy (arrow of time)","level":2,"score":0.45275449752807617},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37425696849823},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.37421369552612305},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3332255780696869},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.25677502155303955},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.14403700828552246},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2024.3462100","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2024.3462100","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","display_name":"Reduced inequalities","score":0.5799999833106995}],"awards":[{"id":"https://openalex.org/G2459130074","display_name":null,"funder_award_id":"U23A20286","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G423321230","display_name":null,"funder_award_id":"2023M740529","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G4745741018","display_name":null,"funder_award_id":"2023TQ0046","funder_id":"https://openalex.org/F4320321543","funder_display_name":"China Postdoctoral Science Foundation"},{"id":"https://openalex.org/G7105383762","display_name":null,"funder_award_id":"62301121","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320321543","display_name":"China Postdoctoral Science Foundation","ror":"https://ror.org/0426zh255"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":67,"referenced_works":["https://openalex.org/W1773149199","https://openalex.org/W1861492603","https://openalex.org/W2194775991","https://openalex.org/W2251512949","https://openalex.org/W2571175805","https://openalex.org/W2770129969","https://openalex.org/W2799263800","https://openalex.org/W2894964039","https://openalex.org/W2904910963","https://openalex.org/W2952524542","https://openalex.org/W2955014993","https://openalex.org/W2962858109","https://openalex.org/W2963109634","https://openalex.org/W2963150697","https://openalex.org/W2964284374","https://openalex.org/W2964345792","https://openalex.org/W2984121207","https://openalex.org/W2986755220","https://openalex.org/W2986803748","https://openalex.org/W2987734933","https://openalex.org/W2989604896","https://openalex.org/W3034772468","https://openalex.org/W3034896527","https://openalex.org/W3035524453","https://openalex.org/W3093314701","https://openalex.org/W3096609285","https://openalex.org/W3110435696","https://openalex.org/W3174004334","https://openalex.org/W4214490042","https://openalex.org/W4283075546","https://openalex.org/W4283805152","https://openalex.org/W4285192809","https://openalex.org/W4312351586","https://openalex.org/W4312599396","https://openalex.org/W4313145013","https://openalex.org/W4313427326","https://openalex.org/W4321021726","https://openalex.org/W4367191035","https://openalex.org/W4379984073","https://openalex.org/W4385245566","https://openalex.org/W4385569741","https://openalex.org/W4386075561","https://openalex.org/W4387967913","https://openalex.org/W4390873481","https://openalex.org/W4390874575","https://openalex.org/W4392543666","https://openalex.org/W4392939995","https://openalex.org/W4396594886","https://openalex.org/W4396941553","https://openalex.org/W4398202893","https://openalex.org/W4402727764","https://openalex.org/W6620707391","https://openalex.org/W6750227808","https://openalex.org/W6778883912","https://openalex.org/W6791353385","https://openalex.org/W6798805250","https://openalex.org/W6802347785","https://openalex.org/W6810334672","https://openalex.org/W6811013733","https://openalex.org/W6849177959","https://openalex.org/W6850787431","https://openalex.org/W6851592950","https://openalex.org/W6851950068","https://openalex.org/W6853116092","https://openalex.org/W6853242345","https://openalex.org/W6856224812","https://openalex.org/W6858620312"],"related_works":["https://openalex.org/W3013650182","https://openalex.org/W2989283631","https://openalex.org/W4249605382","https://openalex.org/W4313491656","https://openalex.org/W2392243736","https://openalex.org/W3279617","https://openalex.org/W86652014","https://openalex.org/W1991183963","https://openalex.org/W2379392295","https://openalex.org/W3160965418"],"abstract_inverted_index":{"Zero-shot":[0],"referring":[1,27,49,110,124,147,202],"expression":[2,28],"comprehension":[3,96],"(zero-shot":[4],"REC)":[5],"is":[6],"a":[7,26,39,107,136,161,175,214],"crucial":[8],"yet":[9],"challenging":[10],"task":[11],"in":[12,160],"the":[13,93,99,131,143,170,183],"field":[14],"of":[15,38,98,109],"multi-modal":[16,100],"understanding,":[17],"which":[18,141],"aims":[19],"to":[20,43,67,114,123,188],"locate":[21],"an":[22,81],"object":[23,148,158],"described":[24],"by":[25,213],"without":[29],"training":[30],"on":[31,70,127,130,217],"task-specific":[32],"datasets.":[33,220],"Existing":[34],"methods":[35,212],"take":[36],"advantage":[37],"pre-trained":[40],"CLIP":[41,68],"model":[42,86,103],"align":[44],"cropped":[45],"proposal":[46],"regions":[47,65,191,199],"with":[48,192,201],"expressions.":[50],"However,":[51],"our":[52,207],"analysis":[53],"reveals":[54],"that":[55,181],"this":[56,76],"aligning":[57],"way":[58],"heavily":[59],"biases":[60],"toward":[61],"certain":[62],"salient":[63,167],"visual":[64,144],"due":[66],"focusing":[69],"global-level":[71],"image-text":[72],"matching.":[73],"To":[74],"mitigate":[75],"bias,":[77],"we":[78,105,134,173],"propose":[79,135],"MCCE-REC,":[80],"MLLM-driven":[82],"cross-modal":[83,138],"contrastive":[84,120,176],"entropy":[85,178],"for":[87,112],"training-free":[88],"zero-shot":[89,211],"REC.":[90],"Benefiting":[91],"from":[92,151],"remarkable":[94],"in-context":[95],"ability":[97],"large":[101],"language":[102],"(MLLM),":[104],"design":[106],"set":[108],"prompts":[111],"MLLM":[113],"generate":[115],"diverse":[116],"detailed":[117],"informative,":[118],"and":[119,146,154,185,196],"cues":[121,187],"related":[122],"objects.":[125],"Based":[126],"these":[128],"cues,":[129],"one":[132],"hand,":[133,172],"multi-cues":[137],"interaction":[139],"network,":[140],"associates":[142],"features":[145,150],"textual":[149],"multiple":[152],"perspectives":[153],"perceives":[155],"surrounding":[156],"context":[157],"information":[159],"parameter-free":[162],"manner,":[163],"avoiding":[164],"bias":[165],"towards":[166],"features.":[168],"On":[169],"other":[171],"introduce":[174],"similarity":[177,194],"selection":[179],"mechanism":[180],"compares":[182],"positive":[184],"negative":[186],"suppress":[189],"biased":[190],"high":[193],"scores":[195],"emphasizes":[197],"accurate":[198],"correlating":[200],"descriptions.":[203],"Extensive":[204],"experiments":[205],"demonstrate":[206],"MCCE-REC":[208],"outperforms":[209],"existing":[210],"significant":[215],"margin":[216],"various":[218],"REC":[219]},"counts_by_year":[{"year":2025,"cited_by_count":7}],"updated_date":"2026-04-09T08:11:56.329763","created_date":"2025-10-10T00:00:00"}
