{"id":"https://openalex.org/W2912738862","doi":"https://doi.org/10.1145/3300939","title":"Modality-Invariant Image-Text Embedding for Image-Sentence Matching","display_name":"Modality-Invariant Image-Text Embedding for Image-Sentence Matching","publication_year":2019,"publication_date":"2019-02-07","ids":{"openalex":"https://openalex.org/W2912738862","doi":"https://doi.org/10.1145/3300939","mag":"2912738862"},"language":"en","primary_location":{"id":"doi:10.1145/3300939","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3300939","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100716154","display_name":"Ruoyu Liu","orcid":"https://orcid.org/0000-0002-4679-3179"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Ruoyu Liu","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, P. R., China"],"affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, P. R., China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100362745","display_name":"Yao Zhao","orcid":"https://orcid.org/0000-0002-8581-9554"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yao Zhao","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, P. R., China"],"affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, P. R., China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006854141","display_name":"Shikui Wei","orcid":"https://orcid.org/0000-0003-3803-9763"},"institutions":[{"id":"https://openalex.org/I21193070","display_name":"Beijing Jiaotong University","ror":"https://ror.org/01yj56c84","country_code":"CN","type":"education","lineage":["https://openalex.org/I21193070"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shikui Wei","raw_affiliation_strings":["Beijing Jiaotong University, Beijing, P. R., China"],"affiliations":[{"raw_affiliation_string":"Beijing Jiaotong University, Beijing, P. R., China","institution_ids":["https://openalex.org/I21193070"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100709340","display_name":"Liang Zheng","orcid":"https://orcid.org/0000-0002-1464-9500"},"institutions":[{"id":"https://openalex.org/I118347636","display_name":"Australian National University","ror":"https://ror.org/019wvm592","country_code":"AU","type":"education","lineage":["https://openalex.org/I118347636"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Liang Zheng","raw_affiliation_strings":["Australian National University, Australia"],"affiliations":[{"raw_affiliation_string":"Australian National University, Australia","institution_ids":["https://openalex.org/I118347636"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5005421447","display_name":"Yi Yang","orcid":"https://orcid.org/0000-0002-0512-880X"},"institutions":[{"id":"https://openalex.org/I114017466","display_name":"University of Technology Sydney","ror":"https://ror.org/03f0f6041","country_code":"AU","type":"education","lineage":["https://openalex.org/I114017466"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Yi Yang","raw_affiliation_strings":["University of Technology Sydney, Ultimo NSW, Australia"],"affiliations":[{"raw_affiliation_string":"University of Technology Sydney, Ultimo NSW, Australia","institution_ids":["https://openalex.org/I114017466"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5100716154"],"corresponding_institution_ids":["https://openalex.org/I21193070"],"apc_list":null,"apc_paid":null,"fwci":2.5542,"has_fulltext":false,"cited_by_count":37,"citation_normalized_percentile":{"value":0.9173839,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"15","issue":"1","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7913992404937744},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.698874831199646},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6572574377059937},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5596212148666382},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.557278573513031},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5569507479667664},{"id":"https://openalex.org/keywords/invariant","display_name":"Invariant (physics)","score":0.4655490815639496},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.43970388174057007},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.4356818199157715},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.43145060539245605},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.42773517966270447},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.4216015934944153},{"id":"https://openalex.org/keywords/contextual-image-classification","display_name":"Contextual image classification","score":0.4152308702468872},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.41207054257392883},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.408063679933548},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.10556700825691223}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7913992404937744},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.698874831199646},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6572574377059937},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5596212148666382},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.557278573513031},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5569507479667664},{"id":"https://openalex.org/C190470478","wikidata":"https://www.wikidata.org/wiki/Q2370229","display_name":"Invariant (physics)","level":2,"score":0.4655490815639496},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.43970388174057007},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.4356818199157715},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.43145060539245605},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.42773517966270447},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.4216015934944153},{"id":"https://openalex.org/C75294576","wikidata":"https://www.wikidata.org/wiki/Q5165192","display_name":"Contextual image classification","level":3,"score":0.4152308702468872},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.41207054257392883},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.408063679933548},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.10556700825691223},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C37914503","wikidata":"https://www.wikidata.org/wiki/Q156495","display_name":"Mathematical physics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1145/3300939","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3300939","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},{"id":"pmh:oai:openresearch-repository.anu.edu.au:1885/309730","is_oa":false,"landing_page_url":"http://hdl.handle.net/1885/309730","pdf_url":null,"source":{"id":"https://openalex.org/S4306402539","display_name":"ANU Open Research (Australian National University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I118347636","host_organization_name":"Australian National University","host_organization_lineage":["https://openalex.org/I118347636"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications and Applications","raw_type":"Journal article"},{"id":"pmh:oai:opus.lib.uts.edu.au:10453/140194","is_oa":false,"landing_page_url":"http://hdl.handle.net/10453/140194","pdf_url":null,"source":{"id":"https://openalex.org/S4306401357","display_name":"UTS ePRESS (University of Technology Sydney)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I114017466","host_organization_name":"University of Technology Sydney","host_organization_lineage":["https://openalex.org/I114017466"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7099999785423279}],"awards":[{"id":"https://openalex.org/G288830202","display_name":null,"funder_award_id":"2018JBZ001","funder_id":"https://openalex.org/F4320335787","funder_display_name":"Fundamental Research Funds for the Central Universities"},{"id":"https://openalex.org/G4678000616","display_name":null,"funder_award_id":"2016YFB0800404","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"},{"id":"https://openalex.org/G8688312675","display_name":null,"funder_award_id":"61532005, 61332012, and 61572065","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"},{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null},{"id":"https://openalex.org/F4320335787","display_name":"Fundamental Research Funds for the Central Universities","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W68733909","https://openalex.org/W1522301498","https://openalex.org/W1527575280","https://openalex.org/W1571323871","https://openalex.org/W1686810756","https://openalex.org/W1731081199","https://openalex.org/W1861492603","https://openalex.org/W1895577753","https://openalex.org/W1895989618","https://openalex.org/W1905882502","https://openalex.org/W1916445035","https://openalex.org/W1924770834","https://openalex.org/W1947481528","https://openalex.org/W1949478088","https://openalex.org/W1957706851","https://openalex.org/W1999965501","https://openalex.org/W2025341678","https://openalex.org/W2032001302","https://openalex.org/W2070753207","https://openalex.org/W2071207147","https://openalex.org/W2095705004","https://openalex.org/W2106277773","https://openalex.org/W2112912048","https://openalex.org/W2117539524","https://openalex.org/W2123024445","https://openalex.org/W2149557440","https://openalex.org/W2159243025","https://openalex.org/W2172140247","https://openalex.org/W2185175083","https://openalex.org/W2187089797","https://openalex.org/W2194775991","https://openalex.org/W2284126738","https://openalex.org/W2326180695","https://openalex.org/W2346425926","https://openalex.org/W2508827254","https://openalex.org/W2525270349","https://openalex.org/W2562630055","https://openalex.org/W2584535601","https://openalex.org/W2591669147","https://openalex.org/W2600067905","https://openalex.org/W2604579046","https://openalex.org/W2606965845","https://openalex.org/W2618530766","https://openalex.org/W2739181657","https://openalex.org/W2752345995","https://openalex.org/W2765440071","https://openalex.org/W2768454054","https://openalex.org/W2770325561","https://openalex.org/W2772313889","https://openalex.org/W2772385709","https://openalex.org/W2778100917","https://openalex.org/W2778940641","https://openalex.org/W2782994636","https://openalex.org/W2949645788","https://openalex.org/W2951690276","https://openalex.org/W2963026686","https://openalex.org/W2963389687","https://openalex.org/W2963826681","https://openalex.org/W3000226596","https://openalex.org/W3099206234","https://openalex.org/W4297969478","https://openalex.org/W4299128829","https://openalex.org/W4320013936","https://openalex.org/W6637618735"],"related_works":["https://openalex.org/W2081900870","https://openalex.org/W2385859805","https://openalex.org/W2530972254","https://openalex.org/W4295532600","https://openalex.org/W2063823869","https://openalex.org/W2047973478","https://openalex.org/W2067569035","https://openalex.org/W2090985514","https://openalex.org/W2037549926","https://openalex.org/W2374013449"],"abstract_inverted_index":{"Performing":[0],"direct":[1],"matching":[2,44,98,155,182],"among":[3,66],"different":[4],"modalities":[5,118],"(like":[6],"image":[7,211,243],"and":[8,20,55,68,160,244,260,274],"text)":[9],"can":[10,72,95],"benefit":[11],"many":[12],"tasks":[13],"in":[14,99,120],"computer":[15],"vision,":[16],"multimedia,":[17],"information":[18,21],"retrieval,":[19],"fusion.":[22],"Most":[23],"of":[24,49,115,150,189],"existing":[25],"works":[26,130],"focus":[27],"on":[28,131,153,255],"class-level":[29],"image-text":[30,89,107,178,233],"matching,":[31,93],"called":[32],"cross-modal":[33],"retrieval":[34,61,134],",":[35],"which":[36,94,204],"attempts":[37],"to":[38,111,138,174,248,280],"propose":[39,85,173],"a":[40,75,86,176,196],"uniform":[41],"model":[42,273],"for":[43,51,106,180],"images":[45],"with":[46,200],"all":[47],"types":[48],"texts,":[50],"example,":[52],"tags,":[53],"sentences,":[54],"articles":[56],"(long":[57],"texts).":[58],"Although":[59],"cross-model":[60,133],"alleviates":[62],"the":[63,100,113,116,121,132,148,190,210,217,226,232,242,271,281],"heterogeneous":[64,97],"gap":[65],"visual":[67],"textual":[69],"information,":[70],"it":[71],"provide":[73,96],"only":[74,230],"rough":[76],"correspondence":[77],"between":[78],"two":[79,117,256],"modalities.":[80],"In":[81,215],"this":[82,126],"article,":[83],"we":[84,172,194],"more":[87],"precise":[88],"embedding":[90,108,122,179,207,246],"method,":[91],"image-sentence":[92,154,181],"instance":[101],"level.":[102],"The":[103],"key":[104],"issue":[105],"is":[109,162,221],"how":[110],"make":[112],"distributions":[114,142,247],"consistent":[119],"space.":[123],"To":[124],"address":[125],"problem,":[127],"some":[128],"previous":[129,170],"task":[135],"have":[136],"attempted":[137],"pull":[139],"close":[140],"their":[141],"by":[143,169,183,236,251],"employing":[144],"adversarial":[145,151,185,202,252],"learning.":[146,186,253],"However,":[147],"effectiveness":[149],"learning":[152],"has":[156],"not":[157,164,229],"been":[158],"proved":[159],"there":[161],"still":[163],"an":[165,201,206],"effective":[166],"method.":[167],"Inspired":[168],"works,":[171],"learn":[175],"modality-invariant":[177],"involving":[184],"On":[187],"top":[188],"triplet":[191],"loss--based":[192],"baseline,":[193],"design":[195],"modality":[197],"classification":[198],"network":[199,228],"loss,":[203],"classifies":[205],"into":[208],"either":[209],"or":[212],"text":[213,245],"modality.":[214],"addition,":[216],"multi-stage":[218],"training":[219],"procedure":[220],"carefully":[222],"designed":[223],"so":[224],"that":[225,263,275],"proposed":[227],"imposes":[231],"similarity":[234],"constraints":[235],"ground-truth":[237],"labels,":[238],"but":[239],"also":[240],"enforces":[241],"be":[249],"similar":[250],"Experiments":[254],"public":[257],"datasets":[258],"(Flickr30k":[259],"MSCOCO)":[261],"demonstrate":[262],"our":[264,276],"method":[265],"yields":[266],"stable":[267],"accuracy":[268],"improvement":[269],"over":[270],"baseline":[272],"results":[277],"compare":[278],"favorably":[279],"state-of-the-art":[282],"methods.":[283]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":7},{"year":2021,"cited_by_count":8},{"year":2020,"cited_by_count":9},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
