{"id":"https://openalex.org/W4313163028","doi":"https://doi.org/10.1109/tcsvt.2022.3212463","title":"ERM: Energy-Based Refined-Attention Mechanism for Video Question Answering","display_name":"ERM: Energy-Based Refined-Attention Mechanism for Video Question Answering","publication_year":2022,"publication_date":"2022-10-05","ids":{"openalex":"https://openalex.org/W4313163028","doi":"https://doi.org/10.1109/tcsvt.2022.3212463"},"language":"en","primary_location":{"id":"doi:10.1109/tcsvt.2022.3212463","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2022.3212463","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5045437445","display_name":"Fuwei Zhang","orcid":"https://orcid.org/0000-0003-0179-9988"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Fuwei Zhang","raw_affiliation_strings":["School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000079751","display_name":"Ruomei Wang","orcid":"https://orcid.org/0000-0002-2712-4412"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Ruomei Wang","raw_affiliation_strings":["School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077610457","display_name":"Fan Zhou","orcid":"https://orcid.org/0000-0003-1736-2641"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fan Zhou","raw_affiliation_strings":["School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5045358430","display_name":"Yuanmao Luo","orcid":"https://orcid.org/0009-0006-8035-6737"},"institutions":[{"id":"https://openalex.org/I157773358","display_name":"Sun Yat-sen University","ror":"https://ror.org/0064kty71","country_code":"CN","type":"education","lineage":["https://openalex.org/I157773358"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuanmao Luo","raw_affiliation_strings":["School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China"],"affiliations":[{"raw_affiliation_string":"School of Computer Science and Engineering and the National Engineering Research Center of Digital Life, Sun Yat-sen University, Guangzhou, China","institution_ids":["https://openalex.org/I157773358"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5045437445"],"corresponding_institution_ids":["https://openalex.org/I157773358"],"apc_list":null,"apc_paid":null,"fwci":2.0395,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.88383051,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":96,"max":99},"biblio":{"volume":"33","issue":"3","first_page":"1454","last_page":"1467"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10627","display_name":"Advanced Image and Video Retrieval Techniques","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11307","display_name":"Domain Adaptation and Few-Shot Learning","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7919857501983643},{"id":"https://openalex.org/keywords/modal","display_name":"Modal","score":0.7335039973258972},{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7057713270187378},{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.6135576963424683},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5584843158721924},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.498366117477417},{"id":"https://openalex.org/keywords/modular-design","display_name":"Modular design","score":0.4839799404144287},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.46992170810699463},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.4247647523880005},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4169751703739166},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32393667101860046}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7919857501983643},{"id":"https://openalex.org/C71139939","wikidata":"https://www.wikidata.org/wiki/Q910194","display_name":"Modal","level":2,"score":0.7335039973258972},{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7057713270187378},{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.6135576963424683},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5584843158721924},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.498366117477417},{"id":"https://openalex.org/C101468663","wikidata":"https://www.wikidata.org/wiki/Q1620158","display_name":"Modular design","level":2,"score":0.4839799404144287},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.46992170810699463},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.4247647523880005},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4169751703739166},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32393667101860046},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C188027245","wikidata":"https://www.wikidata.org/wiki/Q750446","display_name":"Polymer chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tcsvt.2022.3212463","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tcsvt.2022.3212463","pdf_url":null,"source":{"id":"https://openalex.org/S115173108","display_name":"IEEE Transactions on Circuits and Systems for Video Technology","issn_l":"1051-8215","issn":["1051-8215","1558-2205"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Circuits and Systems for Video Technology","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6100000143051147,"display_name":"Reduced inequalities","id":"https://metadata.un.org/sdg/10"}],"awards":[{"id":"https://openalex.org/G8809513915","display_name":null,"funder_award_id":"2021YFC2009400","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":64,"referenced_works":["https://openalex.org/W2079636521","https://openalex.org/W2097117768","https://openalex.org/W2194775991","https://openalex.org/W2250539671","https://openalex.org/W2425121537","https://openalex.org/W2606982687","https://openalex.org/W2765716052","https://openalex.org/W2904452845","https://openalex.org/W2914134937","https://openalex.org/W2954199749","https://openalex.org/W2962934715","https://openalex.org/W2962949233","https://openalex.org/W2963541336","https://openalex.org/W2964057271","https://openalex.org/W2964156315","https://openalex.org/W2965527197","https://openalex.org/W2974161034","https://openalex.org/W2984737709","https://openalex.org/W2997344006","https://openalex.org/W2997805943","https://openalex.org/W2998166190","https://openalex.org/W3010593057","https://openalex.org/W3034425145","https://openalex.org/W3034730770","https://openalex.org/W3035419698","https://openalex.org/W3105232955","https://openalex.org/W3119243803","https://openalex.org/W3122622502","https://openalex.org/W3126595748","https://openalex.org/W3127165192","https://openalex.org/W3160451259","https://openalex.org/W3160817565","https://openalex.org/W3166608351","https://openalex.org/W3167092180","https://openalex.org/W3168640669","https://openalex.org/W3173367591","https://openalex.org/W3181758331","https://openalex.org/W3189890868","https://openalex.org/W3194203264","https://openalex.org/W3197240429","https://openalex.org/W3197364345","https://openalex.org/W3197457832","https://openalex.org/W3203995540","https://openalex.org/W3207409917","https://openalex.org/W3207847779","https://openalex.org/W3217059257","https://openalex.org/W3217304284","https://openalex.org/W4205492448","https://openalex.org/W4226060302","https://openalex.org/W4320167334","https://openalex.org/W4386076661","https://openalex.org/W6631190155","https://openalex.org/W6640212811","https://openalex.org/W6739901393","https://openalex.org/W6786140819","https://openalex.org/W6790679657","https://openalex.org/W6796538260","https://openalex.org/W6797109355","https://openalex.org/W6800388583","https://openalex.org/W6803023527","https://openalex.org/W6804277322","https://openalex.org/W6805349323","https://openalex.org/W6810618142","https://openalex.org/W6848935878"],"related_works":["https://openalex.org/W2729514902","https://openalex.org/W2024160000","https://openalex.org/W2773500201","https://openalex.org/W2061273563","https://openalex.org/W2285052147","https://openalex.org/W4287995534","https://openalex.org/W2743258233","https://openalex.org/W2998168123","https://openalex.org/W1972656095","https://openalex.org/W2970216048"],"abstract_inverted_index":{"Spatiotemporal":[0],"attention":[1,27],"learning":[2],"remains":[3],"a":[4,14,64,77,183,230],"challenging":[5],"video":[6,34,135],"question":[7],"answering":[8],"(VideoQA)":[9],"task":[10],"as":[11,76],"it":[12,166],"requires":[13],"sufficient":[15],"understanding":[16],"of":[17,118,160,178,202],"cross-modal":[18,26,55,83,91,191],"spatiotemporal":[19,46],"information.":[20,93],"Existing":[21],"methods":[22,39,247],"usually":[23],"leverage":[24],"different":[25,132,149,245],"mechanisms":[28],"to":[29,86,98,114,122,235,248],"reveal":[30],"potential":[31],"associations":[32],"between":[33,103,131,148],"and":[35,89,107,170,215],"question.":[36],"While":[37],"these":[38],"effectively":[40],"remove":[41],"irrelevant":[42],"information":[43,52,85],"from":[44,81],"the":[45,50,54,72,100,104,111,116,124,129,137,161,176,179,190,200],"attention,":[47],"they":[48],"ignore":[49],"pseudo-related":[51],"within":[53],"interaction":[56,84,92],"attention.":[57],"To":[58,198],"address":[59],"this":[60,141],"problem,":[61],"we":[62,181,205],"proposed":[63,139,162],"novel":[65],"energy-based":[66,196],"refined-attention":[67],"mechanism":[68],"(ERM).":[69],"ERM":[70,138,163,239],"leverages":[71],"significant":[73],"difference":[74],"distribution":[75],"discriminative":[78],"criterion":[79],"derived":[80],"question-guided":[82],"determine":[87],"question-related":[88],"question-irrelated":[90],"The":[94,146,158,222],"specific":[95],"method":[96,228],"is":[97,167],"measure":[99],"linear":[101],"separability":[102],"target":[105],"neuron":[106],"other":[108],"neurons":[109],"in":[110,134,140,194],"neural":[112],"network":[113],"confirm":[115],"importance":[117],"neurons.":[119],"In":[120],"addition,":[121],"solve":[123],"statistical":[125],"bias":[126],"caused":[127],"by":[128],"differences":[130],"modes":[133,150],"tasks,":[136],"paper":[142],"has":[143],"learnable":[144,156],"parameters.":[145,157],"correlation":[147],"can":[151,240],"be":[152,241],"learned":[153],"adaptively":[154],"through":[155],"advantages":[159],"are":[164],"that":[165,187,226],"more":[168],"flexible":[169],"modular":[171],"while":[172],"remaining":[173],"lightweight.":[174],"With":[175],"help":[177],"ERM,":[180],"construct":[182],"lightweight":[184],"VideoQA":[185,220,237,246],"model":[186],"efficiently":[188],"integrates":[189],"feature":[192],"representations":[193],"an":[195],"manner.":[197],"evaluate":[199],"effectiveness":[201],"our":[203,227],"method,":[204],"carried":[206],"out":[207],"extensive":[208],"experiments":[209],"on":[210],"five":[211],"publicly":[212],"available":[213],"datasets":[214],"compared":[216,234],"them":[217],"with":[218],"state-of-the-art":[219,236],"methods.":[221,238],"experiment":[223],"results":[224],"demonstrate":[225],"brings":[229],"noticeable":[231],"performance":[232],"improvement":[233],"flexibly":[242],"integrated":[243],"into":[244],"improve":[249],"their":[250],"Q&A":[251],"performance.":[252]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":8},{"year":2024,"cited_by_count":9},{"year":2023,"cited_by_count":3}],"updated_date":"2026-03-24T08:02:53.985720","created_date":"2025-10-10T00:00:00"}
