{"id":"https://openalex.org/W2991539277","doi":"https://doi.org/10.1109/tip.2019.2952085","title":"MAVA: Multi-Level Adaptive Visual-Textual Alignment by Cross-Media Bi-Attention Mechanism","display_name":"MAVA: Multi-Level Adaptive Visual-Textual Alignment by Cross-Media Bi-Attention Mechanism","publication_year":2019,"publication_date":"2019-11-22","ids":{"openalex":"https://openalex.org/W2991539277","doi":"https://doi.org/10.1109/tip.2019.2952085","mag":"2991539277","pmid":"https://pubmed.ncbi.nlm.nih.gov/31765311"},"language":"en","primary_location":{"id":"doi:10.1109/tip.2019.2952085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2019.2952085","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5047811387","display_name":"Yuxin Peng","orcid":"https://orcid.org/0000-0001-7658-3845"},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Yuxin Peng","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026142528","display_name":"Jinwei Qi","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jinwei Qi","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5021725678","display_name":"Yunkan Zhuo","orcid":null},"institutions":[{"id":"https://openalex.org/I20231570","display_name":"Peking University","ror":"https://ror.org/02v51f717","country_code":"CN","type":"education","lineage":["https://openalex.org/I20231570"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunkan Zhuo","raw_affiliation_strings":["Wangxuan Institute of Computer Technology, Peking University, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Wangxuan Institute of Computer Technology, Peking University, Beijing, China","institution_ids":["https://openalex.org/I20231570"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5047811387"],"corresponding_institution_ids":["https://openalex.org/I20231570"],"apc_list":null,"apc_paid":null,"fwci":2.3281,"has_fulltext":false,"cited_by_count":38,"citation_normalized_percentile":{"value":0.91190659,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"29","issue":null,"first_page":"2728","last_page":"2741"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.8971171379089355},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7801322937011719},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6424258947372437},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6422878503799438},{"id":"https://openalex.org/keywords/relation","display_name":"Relation (database)","score":0.6066151857376099},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.5932963490486145},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.5020773410797119},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4746999144554138},{"id":"https://openalex.org/keywords/spatial-relation","display_name":"Spatial relation","score":0.441400945186615},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.4303339123725891},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.41793274879455566},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.13404062390327454}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.8971171379089355},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7801322937011719},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6424258947372437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6422878503799438},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.6066151857376099},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.5932963490486145},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.5020773410797119},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4746999144554138},{"id":"https://openalex.org/C27511587","wikidata":"https://www.wikidata.org/wiki/Q2178623","display_name":"Spatial relation","level":2,"score":0.441400945186615},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.4303339123725891},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.41793274879455566},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.13404062390327454},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/tip.2019.2952085","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tip.2019.2952085","pdf_url":null,"source":{"id":"https://openalex.org/S4210173141","display_name":"IEEE Transactions on Image Processing","issn_l":"1057-7149","issn":["1057-7149","1941-0042"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Image Processing","raw_type":"journal-article"},{"id":"pmid:31765311","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/31765311","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE transactions on image processing : a publication of the IEEE Signal Processing Society","raw_type":null}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.7200000286102295,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G7427494650","display_name":null,"funder_award_id":"61925201","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G8355516938","display_name":null,"funder_award_id":"61771025","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":55,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1123427201","https://openalex.org/W1523385540","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W1925193522","https://openalex.org/W1964073652","https://openalex.org/W2013535308","https://openalex.org/W2015394094","https://openalex.org/W2025341678","https://openalex.org/W2052727801","https://openalex.org/W2070753207","https://openalex.org/W2099471712","https://openalex.org/W2100235303","https://openalex.org/W2106277773","https://openalex.org/W2108598243","https://openalex.org/W2157331557","https://openalex.org/W2163605009","https://openalex.org/W2170240176","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2211092169","https://openalex.org/W2277195237","https://openalex.org/W2326180695","https://openalex.org/W2476624367","https://openalex.org/W2574447816","https://openalex.org/W2591669147","https://openalex.org/W2605649771","https://openalex.org/W2606965845","https://openalex.org/W2613718673","https://openalex.org/W2745461083","https://openalex.org/W2765440071","https://openalex.org/W2765977864","https://openalex.org/W2766910785","https://openalex.org/W2774267535","https://openalex.org/W2883311563","https://openalex.org/W2888166343","https://openalex.org/W2896798564","https://openalex.org/W2962964995","https://openalex.org/W2963000732","https://openalex.org/W2963012544","https://openalex.org/W2963499204","https://openalex.org/W2964081303","https://openalex.org/W2964120214","https://openalex.org/W2964216321","https://openalex.org/W4320013936","https://openalex.org/W6620707391","https://openalex.org/W6631216910","https://openalex.org/W6639102338","https://openalex.org/W6684191040","https://openalex.org/W6685053522","https://openalex.org/W6732292492","https://openalex.org/W6745578188","https://openalex.org/W6747225742","https://openalex.org/W6749537441"],"related_works":["https://openalex.org/W17155033","https://openalex.org/W3207760230","https://openalex.org/W2363753014","https://openalex.org/W2385567678","https://openalex.org/W2355121635","https://openalex.org/W1967100394","https://openalex.org/W4312476862","https://openalex.org/W2016995243","https://openalex.org/W1495086659","https://openalex.org/W2382163390"],"abstract_inverted_index":{"The":[0],"rapidly":[1],"developing":[2],"information":[3,64,165,184],"technology":[4],"leads":[5],"to":[6,21,130,166,180,210,224,238,248],"a":[7],"fast":[8],"growth":[9],"of":[10,160,229,272],"visual":[11,103,147],"and":[12,15,24,30,46,57,93,104,142,191,214,233,259],"textual":[13,105,155],"contents,":[14],"it":[16],"comes":[17],"with":[18,119,185,262],"huge":[19],"challenges":[20],"make":[22],"correlation":[23,37,201],"perform":[25,249],"crossmedia":[26,80],"retrieval":[27],"between":[28,68,89,149],"images":[29,45],"sentences.":[31],"Existing":[32],"methods":[33],"mainly":[34],"explore":[35,211],"cross-media":[36,126,206,242,255],"from":[38,65,157,188],"either":[39],"global-level":[40],"instances":[41],"as":[42,52,138,152,154],"the":[43,53,62,66,87,102,120,134,158,182,226,270],"whole":[44],"sentences,":[47,161],"or":[48],"local-level":[49,69],"fine-grained":[50,70,128,136,168,183],"patches":[51,137],"discriminative":[54,90,139,198],"image":[55,91,140,150],"regions":[56,92,141,151],"key":[58,94,143],"words,":[59,95,144],"which":[60,162,194,266],"ignore":[61],"complementary":[63,164],"relation":[67,73,192,215],"patches.":[71],"Naturally,":[72],"understanding":[74],"is":[75,221],"highly":[76],"important":[77],"for":[78,200],"learning":[79],"correlation.":[81,243],"People":[82],"focus":[83],"on":[84,252],"not":[85,132],"only":[86,133],"alignment":[88,209,219],"but":[96,145],"also":[97,146],"their":[98],"relations":[99,148,156],"lying":[100],"in":[101,108],"context.":[106],"Therefore,":[107],"this":[109],"paper,":[110],"we":[111,124,175,204],"propose":[112,125,176,205],"Multi-level":[113],"Adaptive":[114],"Visual-textual":[115],"Alignment":[116],"(MAVA)":[117],"approach":[118],"following":[121],"contributions.":[122],"First,":[123],"multi-pathway":[127],"network":[129],"extract":[131],"local":[135,190,213],"well":[153],"context":[159],"contain":[163],"exploit":[167],"characteristics":[169],"within":[170],"different":[171,186,230],"media":[172,231],"types.":[173],"Second,":[174],"visual-textual":[177],"bi-attention":[178],"mechanism":[179],"distinguish":[181],"saliency":[187],"both":[189],"levels,":[193],"can":[195,267],"provide":[196],"more":[197,240],"hints":[199],"learning.":[202],"Third,":[203],"multi-level":[207],"adaptive":[208,218],"global,":[212],"alignments.":[216],"An":[217],"strategy":[220],"further":[222],"proposed":[223,274],"enhance":[225],"matched":[227],"pairs":[228],"types,":[232],"discard":[234],"those":[235],"misalignments":[236],"adaptively":[237],"learn":[239],"precise":[241],"Extensive":[244],"experiments":[245],"are":[246],"conducted":[247],"image-sentence":[250],"matching":[251],"2":[253],"widely-used":[254],"datasets,":[256],"namely":[257],"Flickr-30K":[258],"MS-COCO,":[260],"comparing":[261],"10":[263],"state-of-the-art":[264],"methods,":[265],"fully":[268],"verify":[269],"effectiveness":[271],"our":[273],"MAVA":[275],"approach.":[276]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":7},{"year":2022,"cited_by_count":9},{"year":2021,"cited_by_count":10},{"year":2020,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
