{"id":"https://openalex.org/W2981473723","doi":"https://doi.org/10.1145/3343031.3350894","title":"Annotation Efficient Cross-Modal Retrieval with Adversarial Attentive Alignment","display_name":"Annotation Efficient Cross-Modal Retrieval with Adversarial Attentive Alignment","publication_year":2019,"publication_date":"2019-10-15","ids":{"openalex":"https://openalex.org/W2981473723","doi":"https://doi.org/10.1145/3343031.3350894","mag":"2981473723"},"language":"en","primary_location":{"id":"doi:10.1145/3343031.3350894","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3343031.3350894","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350894","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350894","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5063149046","display_name":"Po-Yao Huang","orcid":"https://orcid.org/0000-0002-3319-5145"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Po-Yao Huang","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5011488839","display_name":"Guoliang Kang","orcid":"https://orcid.org/0000-0003-1978-2025"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Guoliang Kang","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5045007670","display_name":"Wenhe Liu","orcid":"https://orcid.org/0000-0003-4679-2958"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wenhe Liu","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034967388","display_name":"Xiaojun Chang","orcid":"https://orcid.org/0000-0002-7778-8807"},"institutions":[{"id":"https://openalex.org/I56590836","display_name":"Monash University","ror":"https://ror.org/02bfwt286","country_code":"AU","type":"education","lineage":["https://openalex.org/I56590836"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"Xiaojun Chang","raw_affiliation_strings":["Monash University, Melbourne, Australia"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Monash University, Melbourne, Australia","institution_ids":["https://openalex.org/I56590836"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103099928","display_name":"Alexander G. Hauptmann","orcid":"https://orcid.org/0000-0003-2123-0684"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander G. Hauptmann","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5063149046"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":1.6338,"has_fulltext":true,"cited_by_count":21,"citation_normalized_percentile":{"value":0.87484827,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1758","last_page":"1767"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10812","display_name":"Human Pose and Action Recognition","score":0.9987000226974487,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8604477643966675},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6700659990310669},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.642512857913971},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6420610547065735},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.6193561553955078},{"id":"https://openalex.org/keywords/margin","display_name":"Margin (machine learning)","score":0.5808998346328735},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5692852139472961},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4711657464504242},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.47004762291908264},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.43781858682632446},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.2912292182445526},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.22216391563415527}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8604477643966675},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6700659990310669},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.642512857913971},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6420610547065735},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.6193561553955078},{"id":"https://openalex.org/C774472","wikidata":"https://www.wikidata.org/wiki/Q6760393","display_name":"Margin (machine learning)","level":2,"score":0.5808998346328735},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5692852139472961},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4711657464504242},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.47004762291908264},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.43781858682632446},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2912292182445526},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.22216391563415527},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1145/3343031.3350894","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3343031.3350894","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350894","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"},{"id":"pmh:oai:figshare.com:article/27592512","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.1145/3343031.3350894","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3343031.3350894","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3343031.3350894","source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 27th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.6299999952316284}],"awards":[{"id":"https://openalex.org/G1167955214","display_name":null,"funder_award_id":"FA8750-18-2-0018,FA8750-19-2-0501","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G1732624909","display_name":null,"funder_award_id":"D17PC00340","funder_id":"https://openalex.org/F4320333051","funder_display_name":"Intelligence Advanced Research Projects Activity"}],"funders":[{"id":"https://openalex.org/F4320306116","display_name":"U.S. Department of the Interior","ror":"https://ror.org/03v0pmy70"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320333051","display_name":"Intelligence Advanced Research Projects Activity","ror":"https://ror.org/01v3fsc55"},{"id":"https://openalex.org/F4320333452","display_name":"Interior Business Center","ror":null}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2981473723.pdf","grobid_xml":"https://content.openalex.org/works/W2981473723.grobid-xml"},"referenced_works_count":46,"referenced_works":["https://openalex.org/W92662927","https://openalex.org/W1527575280","https://openalex.org/W1533861849","https://openalex.org/W1686810756","https://openalex.org/W1861492603","https://openalex.org/W1905882502","https://openalex.org/W1957706851","https://openalex.org/W1978962787","https://openalex.org/W2016053056","https://openalex.org/W2033740597","https://openalex.org/W2038721957","https://openalex.org/W2053946370","https://openalex.org/W2064675550","https://openalex.org/W2112912048","https://openalex.org/W2123024445","https://openalex.org/W2134670479","https://openalex.org/W2147238549","https://openalex.org/W2153579005","https://openalex.org/W2185175083","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2280837843","https://openalex.org/W2546696630","https://openalex.org/W2552579943","https://openalex.org/W2603705233","https://openalex.org/W2606473278","https://openalex.org/W2613718673","https://openalex.org/W2742318416","https://openalex.org/W2745461083","https://openalex.org/W2765440071","https://openalex.org/W2768454054","https://openalex.org/W2795389793","https://openalex.org/W2808084195","https://openalex.org/W2883124384","https://openalex.org/W2899505139","https://openalex.org/W2949474740","https://openalex.org/W2950162424","https://openalex.org/W2950178297","https://openalex.org/W2950761309","https://openalex.org/W2951002515","https://openalex.org/W2962968835","https://openalex.org/W2963389687","https://openalex.org/W2963499204","https://openalex.org/W2963826681","https://openalex.org/W2990138404","https://openalex.org/W3103081334"],"related_works":["https://openalex.org/W2376984068","https://openalex.org/W2506386910","https://openalex.org/W2117928543","https://openalex.org/W2393699422","https://openalex.org/W2168037874","https://openalex.org/W2387268739","https://openalex.org/W4250138412","https://openalex.org/W2135728080","https://openalex.org/W2379546782","https://openalex.org/W2534443799"],"abstract_inverted_index":{"Visual-semantic":[0],"embeddings":[1],"are":[2],"central":[3],"to":[4,37,48,64,77,88,180],"many":[5],"multimedia":[6],"applications":[7],"such":[8],"as":[9,73],"cross-modal":[10,135],"retrieval":[11,136],"between":[12,94],"visual":[13,100],"data":[14],"and":[15,96,101,142,172],"natural":[16],"language":[17],"descriptions.":[18],"Conventionally,":[19],"learning":[20],"a":[21,41,61,131],"joint":[22],"embedding":[23,128],"space":[24],"relies":[25],"on":[26,112,138],"large":[27,51],"parallel":[28],"multimodal":[29,115],"corpora.":[30,116],"Since":[31],"massive":[32],"human":[33],"annotation":[34],"is":[35,40,146],"expensive":[36],"obtain,":[38],"there":[39],"strong":[42],"motivation":[43],"in":[44],"developing":[45],"versatile":[46],"algorithms":[47],"learn":[49,78],"from":[50,70],"corpora":[52],"with":[53,184],"fewer":[54],"annotations.":[55,187],"In":[56],"this":[57],"paper,":[58],"we":[59,108],"propose":[60],"novel":[62],"framework":[63],"leverage":[65],"automatically":[66],"extracted":[67],"regional":[68],"semantics":[69],"un-annotated":[71,97],"images":[72],"additional":[74],"weak":[75],"supervision":[76],"visual-semantic":[79,127],"embeddings.":[80],"The":[81,117],"proposed":[82,123,159],"model":[83,124,160],"employs":[84],"adversarial":[85],"attentive":[86],"alignments":[87],"close":[89],"the":[90,122,139,156,158,181,185],"inherent":[91],"heterogeneous":[92],"gaps":[93],"annotated":[95,114],"portions":[98],"of":[99,155],"textual":[102],"domains.":[103],"To":[104],"demonstrate":[105],"its":[106],"superiority,":[107],"conduct":[109],"extensive":[110],"experiments":[111],"sparsely":[113],"experimental":[118],"results":[119],"show":[120],"that":[121],"outperforms":[125],"state-of-the-art":[126],"models":[129],"by":[130],"significant":[132],"margin":[133],"for":[134,170,175],"tasks":[137],"sparse":[140],"Flickr30k":[141],"MS-COCO":[143],"datasets.":[144],"It":[145],"also":[147],"worth":[148],"noting":[149],"that,":[150],"despite":[151],"using":[152],"only":[153],"20%":[154],"annotations,":[157],"can":[161],"achieve":[162],"competitive":[163],"performance":[164],"(Recall":[165],"at":[166],"10":[167],">":[168,173],"80.0%":[169],"1K":[171],"70.0%":[174],"5K":[176],"text-to-image":[177],"retrieval)":[178],"compared":[179],"benchmarks":[182],"trained":[183],"complete":[186]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":3},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-28T14:05:53.105641","created_date":"2025-10-10T00:00:00"}
