{"id":"https://openalex.org/W4312956782","doi":"https://doi.org/10.1109/icpr56361.2022.9956247","title":"CAT: Re-Conv Attention in Transformer for Visual Question Answering","display_name":"CAT: Re-Conv Attention in Transformer for Visual Question Answering","publication_year":2022,"publication_date":"2022-08-21","ids":{"openalex":"https://openalex.org/W4312956782","doi":"https://doi.org/10.1109/icpr56361.2022.9956247"},"language":"en","primary_location":{"id":"doi:10.1109/icpr56361.2022.9956247","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956247","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100392974","display_name":"Haotian Zhang","orcid":"https://orcid.org/0009-0008-0293-337X"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Haotian Zhang","raw_affiliation_strings":["Inner Mongolia University,Hohhot,China","Inner Mongolia University, Hohhot, China"],"affiliations":[{"raw_affiliation_string":"Inner Mongolia University,Hohhot,China","institution_ids":["https://openalex.org/I2722730"]},{"raw_affiliation_string":"Inner Mongolia University, Hohhot, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101960931","display_name":"Wei Wu","orcid":"https://orcid.org/0000-0002-2694-6086"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Wei Wu","raw_affiliation_strings":["Inner Mongolia University,Hohhot,China","Inner Mongolia University, Hohhot, China"],"affiliations":[{"raw_affiliation_string":"Inner Mongolia University,Hohhot,China","institution_ids":["https://openalex.org/I2722730"]},{"raw_affiliation_string":"Inner Mongolia University, Hohhot, China","institution_ids":["https://openalex.org/I2722730"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5100392974"],"corresponding_institution_ids":["https://openalex.org/I2722730"],"apc_list":null,"apc_paid":null,"fwci":0.5418,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.7565949,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"1471","last_page":"1477"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6938490867614746},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.675865888595581},{"id":"https://openalex.org/keywords/transformer","display_name":"Transformer","score":0.6408799290657043},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5793302059173584},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4510754942893982},{"id":"https://openalex.org/keywords/attention-network","display_name":"Attention network","score":0.45058223605155945},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.35985565185546875},{"id":"https://openalex.org/keywords/voltage","display_name":"Voltage","score":0.06747367978096008}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6938490867614746},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.675865888595581},{"id":"https://openalex.org/C66322947","wikidata":"https://www.wikidata.org/wiki/Q11658","display_name":"Transformer","level":3,"score":0.6408799290657043},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5793302059173584},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4510754942893982},{"id":"https://openalex.org/C2993807640","wikidata":"https://www.wikidata.org/wiki/Q103709453","display_name":"Attention network","level":2,"score":0.45058223605155945},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.35985565185546875},{"id":"https://openalex.org/C165801399","wikidata":"https://www.wikidata.org/wiki/Q25428","display_name":"Voltage","level":2,"score":0.06747367978096008},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icpr56361.2022.9956247","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icpr56361.2022.9956247","pdf_url":null,"source":{"id":"https://openalex.org/S4363607731","display_name":"2022 26th International Conference on Pattern Recognition (ICPR)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 26th International Conference on Pattern Recognition (ICPR)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Partnerships for the goals","score":0.49000000953674316,"id":"https://metadata.un.org/sdg/17"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":45,"referenced_works":["https://openalex.org/W639708223","https://openalex.org/W1861492603","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2277195237","https://openalex.org/W2463565445","https://openalex.org/W2508429489","https://openalex.org/W2531409750","https://openalex.org/W2546696630","https://openalex.org/W2558535589","https://openalex.org/W2560730294","https://openalex.org/W2745461083","https://openalex.org/W2765440071","https://openalex.org/W2774267535","https://openalex.org/W2883311563","https://openalex.org/W2962764817","https://openalex.org/W2962964995","https://openalex.org/W2963150697","https://openalex.org/W2963191264","https://openalex.org/W2963521239","https://openalex.org/W2964022527","https://openalex.org/W2964120214","https://openalex.org/W2964345792","https://openalex.org/W2966683369","https://openalex.org/W2966715458","https://openalex.org/W2968124245","https://openalex.org/W2994818707","https://openalex.org/W3006683367","https://openalex.org/W3092767330","https://openalex.org/W3160157873","https://openalex.org/W3186187670","https://openalex.org/W4385245566","https://openalex.org/W6620707391","https://openalex.org/W6639102338","https://openalex.org/W6694395031","https://openalex.org/W6719057275","https://openalex.org/W6739901393","https://openalex.org/W6747225742","https://openalex.org/W6749537441","https://openalex.org/W6752083267","https://openalex.org/W6766904570","https://openalex.org/W6767279747","https://openalex.org/W6774054309","https://openalex.org/W6780226713","https://openalex.org/W6785393113"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2115758952","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W4381058564","https://openalex.org/W3003945460","https://openalex.org/W2964413124","https://openalex.org/W4288267738","https://openalex.org/W4200094402"],"abstract_inverted_index":{"Visual":[0],"Question":[1],"Answering":[2],"(VQA)":[3],"is":[4,83,99],"a":[5,18,67,187],"challenging":[6],"task":[7],"that":[8,134],"obtains":[9],"the":[10,22,45,50,59,72,78,88,107,111,117,124,135,139,147,183],"correct":[11],"answer":[12],"based":[13],"on":[14,116,163],"an":[15],"image":[16,55],"and":[17,29,141,151,172,177,186],"question":[19],"related":[20],"to":[21,48,76,86,101,153],"picture.":[23],"Images":[24],"often":[25],"contain":[26],"more":[27],"information":[28,114],"local":[30,62,104,112,142],"spatial":[31],"relationships":[32,52,143],"than":[33,182],"text":[34],"in":[35,71],"this":[36],"task.":[37],"However,":[38],"many":[39],"current":[40],"VQA":[41],"models":[42],"only":[43],"utilize":[44],"original":[46],"Transformer":[47,73,185],"capture":[49,138],"global":[51,93,118,126,140],"when":[53],"performing":[54],"processing":[56],"while":[57],"ignoring":[58],"equally":[60],"important":[61],"relationships.":[63],"This":[64],"paper":[65],"proposes":[66],"novel":[68],"Re-Conv":[69],"Attention":[70],"module":[74,136],"(CAT)":[75],"solve":[77],"above":[79],"problem.":[80],"Specifically,":[81],"self-attention":[82],"first":[84],"used":[85],"extract":[87,102],"correlation":[89],"between":[90],"features":[91],"(the":[92],"relationship).":[94],"Then,":[95],"depthwise":[96],"separable":[97],"convolution":[98],"utilized":[100],"exciting":[103],"information.":[105],"Finally,":[106],"weight":[108],"generated":[109],"by":[110,121],"essential":[113],"works":[115],"relationship":[119],"extracted":[120],"self-attention,":[122],"developing":[123],"local-guided":[125],"feature,":[127],"which":[128],"constitutes":[129],"our":[130],"re-attention":[131,148],"mechanism,":[132,149],"so":[133],"can":[137],"simultaneously.":[144],"We":[145],"combine":[146],"FFN,":[150],"Layer-norm":[152],"form":[154],"CAT.":[155],"To":[156],"validate":[157],"CAT,":[158],"we":[159],"conduct":[160],"extensive":[161],"experiments":[162],"six":[164],"benchmark":[165],"datasets":[166],"of":[167,189],"VQA,":[168],"Image-Text":[169],"Matching":[170],"(ITM),":[171],"Referring":[173],"Expression":[174],"Comprehension":[175],"(REC)":[176],"achieve":[178],"superior":[179],"performance":[180],"gains":[181],"standard":[184],"bunch":[188],"stats-of-the-art":[190],"methods.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":5},{"year":2024,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
