{"id":"https://openalex.org/W4375869171","doi":"https://doi.org/10.1109/icassp49357.2023.10096661","title":"Semantic-Preserving Augmentation for Robust Image-Text Retrieval","display_name":"Semantic-Preserving Augmentation for Robust Image-Text Retrieval","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4375869171","doi":"https://doi.org/10.1109/icassp49357.2023.10096661"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10096661","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096661","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100733419","display_name":"Sunwoo Kim","orcid":"https://orcid.org/0000-0003-2622-4136"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Sunwoo Kim","raw_affiliation_strings":["Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5064051041","display_name":"Kyuhong Shim","orcid":"https://orcid.org/0000-0002-0123-3100"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Kyuhong Shim","raw_affiliation_strings":["Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036831364","display_name":"Luong Trung Nguyen","orcid":"https://orcid.org/0000-0002-5279-4370"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Luong Trung Nguyen","raw_affiliation_strings":["Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5076075267","display_name":"Byonghyo Shim","orcid":"https://orcid.org/0000-0001-5051-1763"},"institutions":[{"id":"https://openalex.org/I139264467","display_name":"Seoul National University","ror":"https://ror.org/04h9pn542","country_code":"KR","type":"education","lineage":["https://openalex.org/I139264467"]}],"countries":["KR"],"is_corresponding":false,"raw_author_name":"Byonghyo Shim","raw_affiliation_strings":["Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Seoul National University,Department of Electrical and Computer Engineering,Seoul,Korea","institution_ids":["https://openalex.org/I139264467"]},{"raw_affiliation_string":"Department of Electrical and Computer Engineering, Seoul National University, Seoul, Korea","institution_ids":["https://openalex.org/I139264467"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":1,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I139264467"],"apc_list":null,"apc_paid":null,"fwci":0.5555,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.66682732,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":97},"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9983000159263611,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10824","display_name":"Image Retrieval and Classification Techniques","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7520822286605835},{"id":"https://openalex.org/keywords/image-retrieval","display_name":"Image retrieval","score":0.6176795363426208},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5336794257164001},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5013246536254883},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.4869133234024048},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.4026246964931488},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3579632639884949}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7520822286605835},{"id":"https://openalex.org/C1667742","wikidata":"https://www.wikidata.org/wiki/Q10927554","display_name":"Image retrieval","level":3,"score":0.6176795363426208},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5336794257164001},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5013246536254883},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.4869133234024048},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.4026246964931488},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3579632639884949}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10096661","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10096661","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/16","score":0.8199999928474426,"display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1905882502","https://openalex.org/W2123024445","https://openalex.org/W2185175083","https://openalex.org/W2746314669","https://openalex.org/W2896457183","https://openalex.org/W2971296908","https://openalex.org/W2988823324","https://openalex.org/W2991118492","https://openalex.org/W3035497460","https://openalex.org/W3110042533","https://openalex.org/W3117235369","https://openalex.org/W3175888430","https://openalex.org/W3176923149","https://openalex.org/W3184005885","https://openalex.org/W4224916862","https://openalex.org/W4226221010","https://openalex.org/W4295727797","https://openalex.org/W4316021917","https://openalex.org/W6678470764","https://openalex.org/W6743428213","https://openalex.org/W6747225742","https://openalex.org/W6751795773","https://openalex.org/W6755207826","https://openalex.org/W6757555829"],"related_works":["https://openalex.org/W2087793626","https://openalex.org/W2755342338","https://openalex.org/W2779427294","https://openalex.org/W1986902711","https://openalex.org/W2396760013","https://openalex.org/W2148433556","https://openalex.org/W2171776552","https://openalex.org/W98391849","https://openalex.org/W1600907701","https://openalex.org/W2726741344"],"abstract_inverted_index":{"Image-text":[0],"retrieval":[1,42,55,131,137],"is":[2,24,98],"a":[3,52,92],"task":[4,23],"to":[5,27,58,105],"search":[6],"for":[7,77],"the":[8,13,25,37,41,88,102,112,115],"proper":[9],"textual":[10],"descriptions":[11],"of":[12,21,67,111,135],"visual":[14,61],"world":[15],"and":[16,39,70,80,85],"vice":[17],"versa.":[18],"One":[19],"challenge":[20],"this":[22,48],"vulnerability":[26],"input":[28],"image/text":[29],"corruptions.":[30],"Such":[31],"corruptions":[32],"are":[33],"often":[34],"unobserved":[35],"during":[36],"training,":[38],"degrade":[40],"model\u2019s":[43,116],"decision":[44],"quality":[45],"substantially.":[46],"In":[47],"paper,":[49],"we":[50,100,125],"propose":[51],"novel":[53,68],"image-text":[54,136],"technique,":[56],"referred":[57],"as":[59],"robust":[60],"semantic":[62,96],"embedding":[63,108],"(RVSE),":[64],"which":[65],"consists":[66],"image-based":[69],"text-based":[71],"augmentation":[72,76],"techniques":[73],"called":[74],"semantic-preserving":[75],"image":[78],"(SPAug-I)":[79],"text":[81],"(SPAug-T).":[82],"Since":[83],"SPAug-I":[84],"SPAug-T":[86],"change":[87],"original":[89],"data":[90],"in":[91,133],"way":[93],"that":[94,127],"its":[95],"information":[97],"preserved,":[99],"enforce":[101],"feature":[103],"extractors":[104],"generate":[106],"semantic-aware":[107],"vectors":[109],"regardless":[110],"corruption,":[113],"improving":[114],"robustness":[117],"significantly.":[118],"From":[119],"extensive":[120],"experiments":[121],"using":[122],"benchmark":[123],"datasets,":[124],"show":[126],"RVSE":[128],"outperforms":[129],"conventional":[130],"schemes":[132],"terms":[134],"performance.":[138]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":2}],"updated_date":"2026-06-26T08:34:08.712188","created_date":"2025-10-10T00:00:00"}
