{"id":"https://openalex.org/W3033733952","doi":"https://doi.org/10.1145/3372278.3390674","title":"Forward and Backward Multimodal NMT for Improved Monolingual and Multilingual Cross-Modal Retrieval","display_name":"Forward and Backward Multimodal NMT for Improved Monolingual and Multilingual Cross-Modal Retrieval","publication_year":2020,"publication_date":"2020-06-02","ids":{"openalex":"https://openalex.org/W3033733952","doi":"https://doi.org/10.1145/3372278.3390674","mag":"3033733952"},"language":"en","primary_location":{"id":"doi:10.1145/3372278.3390674","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3372278.3390674","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3372278.3390674","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3372278.3390674","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063149046","display_name":"Po-Yao Huang","orcid":"https://orcid.org/0000-0002-3319-5145"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Po-Yao Huang","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034967388","display_name":"Xiaojun Chang","orcid":"https://orcid.org/0000-0002-7778-8807"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xiaojun Chang","raw_affiliation_strings":["Monash University, Melbourne, Australia"],"affiliations":[{"raw_affiliation_string":"Monash University, Melbourne, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107836252","display_name":"Alexander G. Hauptmann","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander Hauptmann","raw_affiliation_strings":["Carnegie Mellon University, PITTSBURGH, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, PITTSBURGH, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060225743","display_name":"Eduard Hovy","orcid":"https://orcid.org/0000-0002-3270-7903"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eduard Hovy","raw_affiliation_strings":["Carnegie Mellon University, PITTSBURGH, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, PITTSBURGH, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5063149046"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.5889,"has_fulltext":true,"cited_by_count":7,"citation_normalized_percentile":{"value":0.68837973,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":91,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"53","last_page":"62"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9810000061988831,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8400437831878662},{"id":"https://openalex.org/keywords/machine-translation","display_name":"Machine translation","score":0.7244552373886108},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7014782428741455},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5940778255462646},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5926288366317749},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.47517484426498413},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.42473772168159485},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.35435932874679565},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3390420079231262}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8400437831878662},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.7244552373886108},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7014782428741455},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5940778255462646},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5926288366317749},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.47517484426498413},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.42473772168159485},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.35435932874679565},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3390420079231262},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.1145/3372278.3390674","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3372278.3390674","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3372278.3390674","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},{"id":"pmh:oai:figshare.com:article/27592704","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"pmh:oai:monash.edu:openaire/3a877e1e-7751-4bcf-8327-b0f47f868457","is_oa":true,"landing_page_url":"https://research.monash.edu/en/publications/3a877e1e-7751-4bcf-8327-b0f47f868457","pdf_url":null,"source":{"id":"https://openalex.org/S4306402625","display_name":"Monash University Research Portal (Monash University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I56590836","host_organization_name":"Monash University","host_organization_lineage":["https://openalex.org/I56590836"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Huang, P-Y, Chang, X, Hauptmann, A & Hovy, E 2020, Forward and backward multimodal nmt for improved monolingual and multilingual cross-modal retrieval. in K Schoeffmann, P Chen & N E. O\u2019Connor (eds), Proceedings of the 2020 International Conference on Multimedia Retrieval. ICMR 2020 - Proceedings of the 2020 International Conference on Multimedia Retrieval, Association for Computing Machinery (ACM), New York NY USA, pp. 53-62, ACM International Conference on Multimedia Retrieval 2020, Dublin, Ireland, 26/10/20. https://doi.org/10.1145/3372278.3390674","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:monash.edu:publications/3a877e1e-7751-4bcf-8327-b0f47f868457","is_oa":true,"landing_page_url":"http://www.scopus.com/inward/record.url?scp=85086886811&partnerID=8YFLogxK","pdf_url":null,"source":{"id":"https://openalex.org/S4306402625","display_name":"Monash University Research Portal (Monash University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I56590836","host_organization_name":"Monash University","host_organization_lineage":["https://openalex.org/I56590836"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Huang , P-Y , Chang , X , Hauptmann , A &amp; Hovy , E 2020 , Forward and backward multimodal nmt for improved monolingual and multilingual cross-modal retrieval . in K Schoeffmann , P Chen &amp; N E. O\u2019Connor (eds) , Proceedings of the 2020 International Conference on Multimedia Retrieval . ICMR 2020 - Proceedings of the 2020 International Conference on Multimedia Retrieval , Association for Computing Machinery (ACM) , New York NY USA , pp. 53-62 , ACM International Conference on Multimedia Retrieval 2020 , Dublin , Ireland , 26/10/20 . https://doi.org/10.1145/3372278.3390674","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"doi:10.1145/3372278.3390674","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3372278.3390674","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3372278.3390674","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7799999713897705}],"awards":[{"id":"https://openalex.org/G4480723317","display_name":null,"funder_award_id":"FA87501820018","funder_id":"https://openalex.org/F4320337531","funder_display_name":"Defense Sciences Office, DARPA"},{"id":"https://openalex.org/G4713059963","display_name":null,"funder_award_id":"FA8750","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G6950192420","display_name":null,"funder_award_id":"HR001119900","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G740863221","display_name":null,"funder_award_id":"FA8750-18-2-0018","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G8444588739","display_name":null,"funder_award_id":"HR00111990063","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https://openalex.org/F4320320720","display_name":"Indian Council of Medical Research","ror":"https://ror.org/0492wrx28"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320337531","display_name":"Defense Sciences Office, DARPA","ror":"https://ror.org/0447fe631"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3033733952.pdf","grobid_xml":"https://content.openalex.org/works/W3033733952.grobid-xml"},"referenced_works_count":39,"referenced_works":["https://openalex.org/W1527575280","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W2124807415","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2483327705","https://openalex.org/W2513263213","https://openalex.org/W2545625743","https://openalex.org/W2546696630","https://openalex.org/W2552579943","https://openalex.org/W2581101319","https://openalex.org/W2606473278","https://openalex.org/W2613718673","https://openalex.org/W2745461083","https://openalex.org/W2808084195","https://openalex.org/W2949335953","https://openalex.org/W2949888546","https://openalex.org/W2950162424","https://openalex.org/W2950359962","https://openalex.org/W2950512756","https://openalex.org/W2950848235","https://openalex.org/W2951174316","https://openalex.org/W2956018683","https://openalex.org/W2962784628","https://openalex.org/W2962964995","https://openalex.org/W2962968835","https://openalex.org/W2963040148","https://openalex.org/W2963216553","https://openalex.org/W2963360726","https://openalex.org/W2963389687","https://openalex.org/W2963403868","https://openalex.org/W2963496089","https://openalex.org/W2963499204","https://openalex.org/W2963909453","https://openalex.org/W2964192290","https://openalex.org/W2964308564","https://openalex.org/W2970632400","https://openalex.org/W2981473723"],"related_works":["https://openalex.org/W3011059803","https://openalex.org/W4390516098","https://openalex.org/W2181948922","https://openalex.org/W2384362569","https://openalex.org/W3151736118","https://openalex.org/W4362495644","https://openalex.org/W4205302943","https://openalex.org/W2119949815","https://openalex.org/W2561132942","https://openalex.org/W2142795561"],"abstract_inverted_index":{"We":[0],"explore":[1],"methods":[2],"to":[3,38,51,102,121,132],"enrich":[4,103],"the":[5,22,64,75,83,88,91,100,157,165,178,184],"diversity":[6,68],"of":[7,24,36,67,167],"captions":[8],"associated":[9],"with":[10,180,186],"pictures":[11],"for":[12],"learning":[13],"improved":[14,140],"visual-semantic":[15],"embeddings":[16],"(VSE)":[17],"in":[18,45,82,94],"cross-modal":[19,62,142,147],"retrieval.":[20],"In":[21],"spirit":[23],"\"A":[25],"picture":[26],"is":[27],"worth":[28],"a":[29,54,112],"thousand":[30],"words\",":[31],"it":[32],"would":[33],"take":[34],"dozens":[35],"sentences":[37],"parallel":[39],"each":[40],"picture's":[41],"content":[42],"adequately.":[43],"But":[44],"fact,":[46],"real-world":[47],"multimodal":[48,116],"datasets":[49],"tend":[50],"provide":[52],"only":[53],"few":[55],"(typically,":[56],"five)":[57],"descriptions":[58],"per":[59],"image.":[60],"For":[61],"retrieval,":[63],"resulting":[65],"lack":[66],"and":[69,79,106,124,145,150,162],"coverage":[70],"prevents":[71],"systems":[72],"from":[73],"capturing":[74],"fine-grained":[76],"inter-modal":[77],"dependencies":[78],"intra-modal":[80],"diversities":[81],"shared":[84],"VSE":[85,182],"space.":[86],"Using":[87],"fact":[89],"that":[90,156,177],"encoder-decoder":[92],"architectures":[93],"neural":[95,117],"machine":[96,118],"translation":[97,119],"(NMT)":[98],"have":[99],"capacity":[101],"both":[104],"monolingual":[105,141,187],"multilingual":[107,146,181],"textual":[108],"diversity,":[109],"we":[110],"propose":[111],"novel":[113],"framework":[114,159],"leveraging":[115],"(MMT)":[120],"perform":[122],"forward":[123],"backward":[125],"translations":[126],"based":[127],"on":[128,170],"salient":[129],"visual":[130],"objects":[131],"generate":[133],"additional":[134],"text-image":[135],"pairs":[136],"which":[137],"enables":[138],"training":[139],"retrieval":[143,148],"(English-Image)":[144],"(English-Image":[149],"German-Image)":[151],"models.":[152],"Experimental":[153],"results":[154,174],"show":[155],"proposed":[158],"can":[160],"substantially":[161],"consistently":[163],"improve":[164],"performance":[166],"state-of-the-art":[168],"models":[169,179,185],"multiple":[171],"datasets.":[172],"The":[173],"also":[175],"suggest":[176],"outperform":[183],"VSE.":[188]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":4}],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-10-10T00:00:00"}
