{"id":"https://openalex.org/W3035635363","doi":"https://doi.org/10.1109/icme46284.2020.9102714","title":"Mutatt: Visual-Textual Mutual Guidance For Referring Expression Comprehension","display_name":"Mutatt: Visual-Textual Mutual Guidance For Referring Expression Comprehension","publication_year":2020,"publication_date":"2020-06-09","ids":{"openalex":"https://openalex.org/W3035635363","doi":"https://doi.org/10.1109/icme46284.2020.9102714","mag":"3035635363"},"language":"en","primary_location":{"id":"doi:10.1109/icme46284.2020.9102714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme46284.2020.9102714","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100328272","display_name":"Shuai Wang","orcid":"https://orcid.org/0000-0002-1595-3619"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shuai Wang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101573269","display_name":"Fan Lyu","orcid":"https://orcid.org/0000-0002-0878-5485"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Lyu","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100721881","display_name":"Wei Feng","orcid":"https://orcid.org/0000-0003-3809-1086"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Feng","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5082259804","display_name":"Song Wang","orcid":"https://orcid.org/0000-0003-4152-5295"},"institutions":[{"id":"https://openalex.org/I162868743","display_name":"Tianjin University","ror":"https://ror.org/012tb2g32","country_code":"CN","type":"education","lineage":["https://openalex.org/I162868743"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Song Wang","raw_affiliation_strings":["College of Intelligence and Computing, Tianjin University, China"],"affiliations":[{"raw_affiliation_string":"College of Intelligence and Computing, Tianjin University, China","institution_ids":["https://openalex.org/I162868743"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100328272"],"corresponding_institution_ids":["https://openalex.org/I162868743"],"apc_list":null,"apc_paid":null,"fwci":0.2931,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.55528668,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9925000071525574,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9897000193595886,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8031706213951111},{"id":"https://openalex.org/keywords/expression","display_name":"Expression (computer science)","score":0.6742163896560669},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.6276717782020569},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6085574626922607},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5849643349647522},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5835719108581543},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5763538479804993},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.5659187436103821},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.5570802092552185},{"id":"https://openalex.org/keywords/comprehension","display_name":"Comprehension","score":0.5309849381446838},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.473819375038147},{"id":"https://openalex.org/keywords/mutual-information","display_name":"Mutual information","score":0.4461383819580078},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08427935838699341},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.07145673036575317}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8031706213951111},{"id":"https://openalex.org/C90559484","wikidata":"https://www.wikidata.org/wiki/Q778379","display_name":"Expression (computer science)","level":2,"score":0.6742163896560669},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.6276717782020569},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6085574626922607},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5849643349647522},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5835719108581543},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5763538479804993},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.5659187436103821},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.5570802092552185},{"id":"https://openalex.org/C511192102","wikidata":"https://www.wikidata.org/wiki/Q5156948","display_name":"Comprehension","level":2,"score":0.5309849381446838},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.473819375038147},{"id":"https://openalex.org/C152139883","wikidata":"https://www.wikidata.org/wiki/Q252973","display_name":"Mutual information","level":2,"score":0.4461383819580078},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08427935838699341},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.07145673036575317},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icme46284.2020.9102714","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icme46284.2020.9102714","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 IEEE International Conference on Multimedia and Expo (ICME)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.7400000095367432,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":29,"referenced_works":["https://openalex.org/W1527575280","https://openalex.org/W2123024445","https://openalex.org/W2489434015","https://openalex.org/W2558535589","https://openalex.org/W2571175805","https://openalex.org/W2583360688","https://openalex.org/W2754927243","https://openalex.org/W2770129969","https://openalex.org/W2779827764","https://openalex.org/W2799263800","https://openalex.org/W2886247548","https://openalex.org/W2891424527","https://openalex.org/W2904910963","https://openalex.org/W2912423076","https://openalex.org/W2962760898","https://openalex.org/W2962764817","https://openalex.org/W2963109634","https://openalex.org/W2963449390","https://openalex.org/W2963467339","https://openalex.org/W2963882743","https://openalex.org/W2964022527","https://openalex.org/W2964345792","https://openalex.org/W2984121207","https://openalex.org/W2986755220","https://openalex.org/W2988823324","https://openalex.org/W2990069284","https://openalex.org/W6631516269","https://openalex.org/W6678470764","https://openalex.org/W6754456203"],"related_works":["https://openalex.org/W2466816617","https://openalex.org/W2081900870","https://openalex.org/W1970834875","https://openalex.org/W842936808","https://openalex.org/W3174028392","https://openalex.org/W2000517284","https://openalex.org/W2365318811","https://openalex.org/W2136503713","https://openalex.org/W2375330620","https://openalex.org/W2363755581"],"abstract_inverted_index":{"Referring":[0],"expression":[1,17,52],"comprehension":[2],"(REC)":[3],"aims":[4],"to":[5,26,82,122,142],"localize":[6],"a":[7,11,15,77],"text-related":[8],"region":[9,56],"in":[10,18,157],"given":[12],"image":[13],"by":[14],"referring":[16,51],"natural":[19],"language.":[20,70],"Existing":[21],"methods":[22],"focus":[23],"on":[24,161],"how":[25],"build":[27],"convincing":[28],"visual":[29,38,133,140],"and":[30,39,53,60,63,69,88,93,108],"language":[31,40,94,125,145],"representations":[32],"independently,":[33],"which":[34,90],"may":[35],"significantly":[36],"isolate":[37],"information.":[41],"In":[42],"this":[43],"paper,":[44],"we":[45,75],"argue":[46],"that":[47,167],"for":[48,102,127],"REC":[49,164],"the":[50,54,128,154,168,172],"target":[55],"are":[57],"semantically":[58],"correlated":[59],"subject,":[61,106],"location":[62,107],"relationship":[64],"consistency":[65,156],"exist":[66],"between":[67,86],"vision":[68,87,92],"On":[71],"top":[72],"of":[73,105,114,130],"this,":[74],"propose":[76],"novel":[78],"approach":[79,170],"called":[80],"MutAtt":[81,110],"construct":[83],"mutual":[84,116,148],"guidance":[85,117,149],"language,":[89],"treats":[91],"equally":[95],"thus":[96],"yields":[97],"compact":[98],"information":[99],"matching.":[100],"Specifically,":[101],"each":[103],"module":[104],"relationship,":[109],"builds":[111],"two":[112],"kinds":[113],"attention-based":[115],"strategies.":[118],"One":[119],"strategy":[120,150],"is":[121],"generate":[123],"vision-guided":[124],"embedding":[126],"sake":[129],"matching":[131],"relevant":[132,144],"features.":[134],"The":[135],"other":[136],"reversely":[137],"generates":[138],"language-guided":[139],"features":[141],"match":[143],"embedding.":[146],"This":[147],"can":[151],"effectively":[152],"enforce":[153],"vision-language":[155],"three":[158,162],"modules.":[159],"Experiments":[160],"popular":[163],"datasets":[165],"demonstrate":[166],"proposed":[169],"outperforms":[171],"current":[173],"state-of-the-art":[174],"methods.":[175]},"counts_by_year":[{"year":2024,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
