{"id":"https://openalex.org/W3160431667","doi":"https://doi.org/10.1109/icpr48806.2021.9412890","title":"Cascade Attention Guided Residue Learning GAN for Cross-Modal Translation","display_name":"Cascade Attention Guided Residue Learning GAN for Cross-Modal Translation","publication_year":2021,"publication_date":"2021-01-10","ids":{"openalex":"https://openalex.org/W3160431667","doi":"https://doi.org/10.1109/icpr48806.2021.9412890","mag":"3160431667"},"language":"en","primary_location":{"id":"doi:10.1109/icpr48806.2021.9412890","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412890","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5060117769","display_name":"Bin Duan","orcid":"https://orcid.org/0000-0003-3302-1900"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Bin Duan","raw_affiliation_strings":["Illinois Institute of Technology, USA"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, USA","institution_ids":["https://openalex.org/I180949307"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100392006","display_name":"Wei Wang","orcid":"https://orcid.org/0000-0002-5477-1017"},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Wei Wang","raw_affiliation_strings":["University of Trento, Italy"],"affiliations":[{"raw_affiliation_string":"University of Trento, Italy","institution_ids":["https://openalex.org/I193223587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050748634","display_name":"Hao Tang","orcid":"https://orcid.org/0000-0002-2077-1246"},"institutions":[{"id":"https://openalex.org/I193223587","display_name":"University of Trento","ror":"https://ror.org/05trd4x28","country_code":"IT","type":"education","lineage":["https://openalex.org/I193223587"]}],"countries":["IT"],"is_corresponding":false,"raw_author_name":"Hao Tang","raw_affiliation_strings":["University of Trento, Italy"],"affiliations":[{"raw_affiliation_string":"University of Trento, Italy","institution_ids":["https://openalex.org/I193223587"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5041766127","display_name":"Hugo Latapie","orcid":"https://orcid.org/0000-0003-2755-5930"},"institutions":[{"id":"https://openalex.org/I2801562743","display_name":"Cisco College","ror":"https://ror.org/03gc7jk79","country_code":"US","type":"education","lineage":["https://openalex.org/I2801562743"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hugo Latapie","raw_affiliation_strings":["Cisco, USA"],"affiliations":[{"raw_affiliation_string":"Cisco, USA","institution_ids":["https://openalex.org/I2801562743"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100395059","display_name":"Yan Yan","orcid":"https://orcid.org/0000-0002-3674-7160"},"institutions":[{"id":"https://openalex.org/I180949307","display_name":"Illinois Institute of Technology","ror":"https://ror.org/037t3ry66","country_code":"US","type":"education","lineage":["https://openalex.org/I180949307"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yan Yan","raw_affiliation_strings":["Illinois Institute of Technology, USA"],"affiliations":[{"raw_affiliation_string":"Illinois Institute of Technology, USA","institution_ids":["https://openalex.org/I180949307"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5060117769"],"corresponding_institution_ids":["https://openalex.org/I180949307"],"apc_list":null,"apc_paid":null,"fwci":2.59163146,"has_fulltext":false,"cited_by_count":24,"citation_normalized_percentile":{"value":0.9085148,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1336","last_page":"1343"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.97079998254776,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9696000218391418,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7508949041366577},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.749106764793396},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5483331680297852},{"id":"https://openalex.org/keywords/stimulus-modality","display_name":"Stimulus modality","score":0.5378152132034302},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.5267773866653442},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.4942186176776886},{"id":"https://openalex.org/keywords/multi-task-learning","display_name":"Multi-task learning","score":0.47566455602645874},{"id":"https://openalex.org/keywords/cascade","display_name":"Cascade","score":0.4497702419757843},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.4445732533931732},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4213646352291107},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.35947418212890625},{"id":"https://openalex.org/keywords/sensory-system","display_name":"Sensory system","score":0.17220500111579895},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.090829998254776}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7508949041366577},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.749106764793396},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5483331680297852},{"id":"https://openalex.org/C26486553","wikidata":"https://www.wikidata.org/wiki/Q371870","display_name":"Stimulus modality","level":3,"score":0.5378152132034302},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.5267773866653442},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.4942186176776886},{"id":"https://openalex.org/C28006648","wikidata":"https://www.wikidata.org/wiki/Q6934509","display_name":"Multi-task learning","level":3,"score":0.47566455602645874},{"id":"https://openalex.org/C34146451","wikidata":"https://www.wikidata.org/wiki/Q5048094","display_name":"Cascade","level":2,"score":0.4497702419757843},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.4445732533931732},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4213646352291107},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.35947418212890625},{"id":"https://openalex.org/C94487597","wikidata":"https://www.wikidata.org/wiki/Q11101","display_name":"Sensory system","level":2,"score":0.17220500111579895},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.090829998254776},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr48806.2021.9412890","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr48806.2021.9412890","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2020 25th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.8399999737739563,"id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G3781308588","display_name":null,"funder_award_id":"NeTS-1909185","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":75,"referenced_works":["https://openalex.org/W1522301498","https://openalex.org/W1901129140","https://openalex.org/W1975141679","https://openalex.org/W1975647954","https://openalex.org/W1978491093","https://openalex.org/W2044313232","https://openalex.org/W2044626341","https://openalex.org/W2047668141","https://openalex.org/W2099471712","https://openalex.org/W2106277773","https://openalex.org/W2106714211","https://openalex.org/W2108598243","https://openalex.org/W2125389028","https://openalex.org/W2138118304","https://openalex.org/W2184188583","https://openalex.org/W2194775991","https://openalex.org/W2579578355","https://openalex.org/W2603777577","https://openalex.org/W2626792426","https://openalex.org/W2738004338","https://openalex.org/W2759171953","https://openalex.org/W2785563047","https://openalex.org/W2798312861","https://openalex.org/W2799062425","https://openalex.org/W2804078698","https://openalex.org/W2883127586","https://openalex.org/W2890581551","https://openalex.org/W2922538097","https://openalex.org/W2951611190","https://openalex.org/W2953096069","https://openalex.org/W2962699416","https://openalex.org/W2962720665","https://openalex.org/W2962793481","https://openalex.org/W2962838408","https://openalex.org/W2962845008","https://openalex.org/W2963047368","https://openalex.org/W2963066677","https://openalex.org/W2963073614","https://openalex.org/W2963207848","https://openalex.org/W2963373786","https://openalex.org/W2963470893","https://openalex.org/W2963663420","https://openalex.org/W2963767194","https://openalex.org/W2963800363","https://openalex.org/W2963841322","https://openalex.org/W2963981733","https://openalex.org/W2964121744","https://openalex.org/W2964185501","https://openalex.org/W2979157532","https://openalex.org/W2996609772","https://openalex.org/W3034954643","https://openalex.org/W3085151711","https://openalex.org/W3093407252","https://openalex.org/W3101943858","https://openalex.org/W3105838287","https://openalex.org/W3106811106","https://openalex.org/W3201409833","https://openalex.org/W4247442285","https://openalex.org/W4320013936","https://openalex.org/W4396952261","https://openalex.org/W6631190155","https://openalex.org/W6639824700","https://openalex.org/W6686207219","https://openalex.org/W6718379498","https://openalex.org/W6736965957","https://openalex.org/W6745983426","https://openalex.org/W6750427546","https://openalex.org/W6750774204","https://openalex.org/W6752378368","https://openalex.org/W6753532407","https://openalex.org/W6753914649","https://openalex.org/W6758616051","https://openalex.org/W6765779288","https://openalex.org/W6780698268","https://openalex.org/W6948100164"],"related_works":["https://openalex.org/W2153719181","https://openalex.org/W1971748923","https://openalex.org/W2914599329","https://openalex.org/W2158909151","https://openalex.org/W2047374411","https://openalex.org/W3185657312","https://openalex.org/W3006088529","https://openalex.org/W1528616482","https://openalex.org/W993965852","https://openalex.org/W2137074101"],"abstract_inverted_index":{"Since":[0],"we":[1,4,104,124],"were":[2],"babies,":[3],"intuitively":[5],"develop":[6],"the":[7,11,60,77,95,98,116,119,131,153,162,183,189],"ability":[8],"to":[9,62,93,129,151,170],"correlate":[10],"input":[12],"from":[13],"different":[14,35,50,134,173],"cognitive":[15],"sensors":[16],"such":[17],"as":[18],"vision,":[19],"audio,":[20],"and":[21,55,83,167],"text.":[22],"However,":[23],"in":[24,161],"machine":[25,90],"learning,":[26],"this":[27,102],"cross-modal":[28,154,185],"learning":[29,91,155],"is":[30,149,168],"a":[31,53,71,74,106,126,138,144],"nontrivial":[32],"task":[33],"because":[34],"modalities":[36,135],"have":[37,59],"no":[38],"homogeneous":[39],"properties.":[40],"Previous":[41],"works":[42],"discover":[43],"that":[44,179],"there":[45],"should":[46],"be":[47],"bridges":[48],"among":[49],"modalities.":[51,174],"From":[52],"neurology":[54],"psychology":[56],"perspective,":[57],"humans":[58],"capacity":[61],"link":[63],"one":[64],"modality":[65],"with":[66,76,143],"another":[67],"one,":[68],"e.g.,":[69],"associating":[70],"picture":[72],"of":[73,80],"bird":[75],"only":[78],"hearing":[79],"its":[81],"singing":[82],"vice":[84],"versa.":[85],"Is":[86],"it":[87],"possible":[88],"for":[89],"algorithms":[92],"recover":[94],"scene":[96],"given":[97,118],"audio":[99,121],"signal?":[100],"In":[101],"paper,":[103],"propose":[105],"novel":[107,145],"Cascade":[108],"Attention-Guided":[109],"Residue":[110],"GAN":[111],"(CAR-GAN),":[112],"aiming":[113],"at":[114],"reconstructing":[115],"scenes":[117],"corresponding":[120],"signals.":[122],"Particularly,":[123],"present":[125],"residue":[127],"module":[128],"mitigate":[130],"gap":[132],"between":[133],"progressively.":[136],"Moreover,":[137],"cascade":[139],"attention":[140],"guided":[141],"network":[142],"classification":[146],"loss":[147],"function":[148],"designed":[150],"tackle":[152],"task.":[156],"Our":[157],"model":[158,181],"keeps":[159],"consistency":[160],"high-level":[163],"semantic":[164],"label":[165],"domain":[166],"able":[169],"balance":[171],"two":[172],"The":[175],"experimental":[176],"results":[177],"demonstrate":[178],"our":[180],"achieves":[182],"state-of-the-art":[184],"audio-visual":[186],"generation":[187],"on":[188],"challenging":[190],"Sub-URMP":[191],"dataset.":[192]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":5},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":7},{"year":2020,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
