{"id":"https://openalex.org/W3135581355","doi":"https://doi.org/10.1145/3448823.3448825","title":"Generative Adversarial and Self-Attention Based Fine-Grained Cross-Media Retrieval","display_name":"Generative Adversarial and Self-Attention Based Fine-Grained Cross-Media Retrieval","publication_year":2020,"publication_date":"2020-12-09","ids":{"openalex":"https://openalex.org/W3135581355","doi":"https://doi.org/10.1145/3448823.3448825","mag":"3135581355"},"language":"en","primary_location":{"id":"doi:10.1145/3448823.3448825","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448823.3448825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 4th International Conference on Vision, Image and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062499214","display_name":"Jin Seong Hong","orcid":null},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Jin Hong","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085348492","display_name":"Haonan Luo","orcid":"https://orcid.org/0000-0002-9121-2687"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Haonan Luo","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027545344","display_name":"Yazhou Yao","orcid":"https://orcid.org/0000-0002-0337-9410"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yazhou Yao","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100735663","display_name":"Zhenmin Tang","orcid":"https://orcid.org/0000-0001-6708-2205"},"institutions":[{"id":"https://openalex.org/I36399199","display_name":"Nanjing University of Science and Technology","ror":"https://ror.org/00xp9wg62","country_code":"CN","type":"education","lineage":["https://openalex.org/I36399199"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhenmin Tang","raw_affiliation_strings":["School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering, Nanjing University of Science and Technology, Nanjing, China","institution_ids":["https://openalex.org/I36399199"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5062499214"],"corresponding_institution_ids":["https://openalex.org/I36399199"],"apc_list":null,"apc_paid":null,"fwci":0.0977,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.4431337,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"8"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9986000061035156,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8646626472473145},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7148864269256592},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.606225311756134},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.5773671269416809},{"id":"https://openalex.org/keywords/convolutional-neural-network","display_name":"Convolutional neural network","score":0.565852165222168},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.5464127063751221},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.5257095694541931},{"id":"https://openalex.org/keywords/adversarial-system","display_name":"Adversarial system","score":0.5067084431648254},{"id":"https://openalex.org/keywords/deep-learning","display_name":"Deep learning","score":0.4968424141407013},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.49118825793266296},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.4842486083507538},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4109266400337219},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3461824655532837},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.2203979790210724}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8646626472473145},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7148864269256592},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.606225311756134},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.5773671269416809},{"id":"https://openalex.org/C81363708","wikidata":"https://www.wikidata.org/wiki/Q17084460","display_name":"Convolutional neural network","level":2,"score":0.565852165222168},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.5464127063751221},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.5257095694541931},{"id":"https://openalex.org/C37736160","wikidata":"https://www.wikidata.org/wiki/Q1801315","display_name":"Adversarial system","level":2,"score":0.5067084431648254},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.4968424141407013},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.49118825793266296},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.4842486083507538},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4109266400337219},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3461824655532837},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2203979790210724},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3448823.3448825","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3448823.3448825","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2020 4th International Conference on Vision, Image and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Reduced inequalities","score":0.6600000262260437,"id":"https://metadata.un.org/sdg/10"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1605417594","https://openalex.org/W2013535308","https://openalex.org/W2556951257","https://openalex.org/W2747647654","https://openalex.org/W2808711976","https://openalex.org/W2891355224","https://openalex.org/W2904347197","https://openalex.org/W2963090248","https://openalex.org/W2963403868","https://openalex.org/W2964168984","https://openalex.org/W2998115938","https://openalex.org/W2998825242","https://openalex.org/W3005324544","https://openalex.org/W3101225052"],"related_works":["https://openalex.org/W73545470","https://openalex.org/W4224266612","https://openalex.org/W2383394264","https://openalex.org/W4320153225","https://openalex.org/W4293261942","https://openalex.org/W3125968744","https://openalex.org/W2167701463","https://openalex.org/W2110287964","https://openalex.org/W4307407935","https://openalex.org/W649759291"],"abstract_inverted_index":{"Deep":[0],"convolutional":[1],"neural":[2],"networks":[3],"have":[4,196],"recently":[5],"demonstrated":[6],"an":[7,62],"impressive":[8],"ability":[9],"to":[10,32,128,163,186],"conduct":[11],"the":[12,50,67,74,78,81,93,106,120,165,182],"task":[13],"of":[14,37,52,69,77,83,138,171],"fine-grained":[15,20,99],"cross-media":[16,21,100],"retrieval.":[17],"However,":[18],"existing":[19,55],"retrieval":[22,27,89],"algorithms":[23,56],"offer":[24],"comparatively":[25],"low":[26],"accuracy":[28],"and":[29,110,194],"are":[30,148],"difficult":[31],"apply":[33],"in":[34,61,119],"practice":[35],"because":[36],"three":[38],"challenging":[39],"difficulties.":[40],"Firstly,":[41],"videos":[42,121],"contain":[43],"many":[44],"noise":[45,117],"frames":[46,118],"which":[47,65,102],"may":[48],"affect":[49],"extraction":[51],"features.":[53],"Secondly,":[54],"deal":[57],"with":[58],"different":[59],"modalities":[60],"indiscriminative":[63],"way,":[64],"ignore":[66],"characteristic":[68,76],"each":[70,139],"modality,":[71],"for":[72,169],"example,":[73],"sequence":[75],"text.":[79],"Thirdly,":[80],"lack":[82],"joint":[84],"semantic":[85,167],"space":[86,168],"learning":[87],"limits":[88],"accuracy.":[90],"To":[91],"overcome":[92],"drawbacks,":[94],"we":[95,135],"propose":[96],"a":[97,123,151,157,177],"novel":[98],"algorithm,":[101],"is":[103,161],"based":[104,153],"on":[105,176],"generative":[107,158],"adversarial":[108,159],"network":[109,160],"self-attention":[111,152],"mechanism.":[112],"Our":[113],"approach":[114],"firstly":[115],"removes":[116],"by":[122,150],"spatial":[124],"cluster":[125],"filtering":[126],"algorithm":[127],"obtain":[129],"more":[130],"pure":[131],"video":[132],"data.":[133],"Then":[134],"extract":[136],"features":[137,147,170],"modality.":[140],"It":[141],"should":[142],"be":[143],"noted":[144],"that":[145],"text":[146],"extracted":[149],"LSTM":[154],"structure.":[155],"Finally,":[156],"used":[162],"learn":[164],"common":[166],"all":[172],"modalities.":[173],"Experimental":[174],"evaluations":[175],"new":[178],"benchmark":[179],"FGCorssNet":[180],"demonstrate":[181],"improving":[183],"results":[184],"compared":[185],"other":[187],"counterpart":[188],"methods.":[189],"The":[190],"source":[191],"codes,":[192],"models,":[193],"data":[195],"been":[197],"made":[198],"anonymously":[199],"available":[200],"at":[201],"https://github.com/gasanet/GASA.":[202]},"counts_by_year":[{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
