{"id":"https://openalex.org/W7164849829","doi":"https://doi.org/10.1145/3805622.3810673","title":"Query-Guided Conflict Inference and Incongruity-Aware Alignment for Implicit Hate Speech Detection in Videos","display_name":"Query-Guided Conflict Inference and Incongruity-Aware Alignment for Implicit Hate Speech Detection in Videos","publication_year":2026,"publication_date":"2026-06-15","ids":{"openalex":"https://openalex.org/W7164849829","doi":"https://doi.org/10.1145/3805622.3810673"},"language":null,"primary_location":{"id":"doi:10.1145/3805622.3810673","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810673","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3805622.3810673","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5138671441","display_name":"Shuo Liu","orcid":"https://orcid.org/0009-0007-6550-8840"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Shuo Liu","raw_affiliation_strings":["Jianghan University, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0007-6550-8840","affiliations":[{"raw_affiliation_string":"Jianghan University, Wuhan, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123392491","display_name":"Jiakang Yu","orcid":null},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiakang Yu","raw_affiliation_strings":["Jianghan University, Wuhan, China"],"raw_orcid":"https://orcid.org/0009-0005-3183-2576","affiliations":[{"raw_affiliation_string":"Jianghan University, Wuhan, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016914537","display_name":"Xun Zhu","orcid":"https://orcid.org/0000-0002-5143-6774"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xun Zhu","raw_affiliation_strings":["Jianghan University, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0002-5143-6774","affiliations":[{"raw_affiliation_string":"Jianghan University, Wuhan, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5138674737","display_name":"Hongtao Deng","orcid":"https://orcid.org/0000-0001-6910-499X"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Hongtao Deng","raw_affiliation_strings":["Jianghan University, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0001-6910-499X","affiliations":[{"raw_affiliation_string":"Jianghan University, Wuhan, China","institution_ids":["https://openalex.org/I31590910"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5063227535","display_name":"Yinxia Lou","orcid":"https://orcid.org/0000-0001-5500-5982"},"institutions":[{"id":"https://openalex.org/I31590910","display_name":"Jianghan University","ror":"https://ror.org/041c9x778","country_code":"CN","type":"education","lineage":["https://openalex.org/I31590910"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yinxia Lou","raw_affiliation_strings":["Jianghan University, Wuhan, China"],"raw_orcid":"https://orcid.org/0000-0001-5500-5982","affiliations":[{"raw_affiliation_string":"Jianghan University, Wuhan, China","institution_ids":["https://openalex.org/I31590910"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.95001973,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1495","last_page":"1503"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.8166999816894531,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.8166999816894531,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10667","display_name":"Emotion and Mood Recognition","score":0.05730000138282776,"subfield":{"id":"https://openalex.org/subfields/3205","display_name":"Experimental and Cognitive Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10775","display_name":"Generative Adversarial Networks and Image Synthesis","score":0.022199999541044235,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.6858000159263611},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6262999773025513},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5198000073432922},{"id":"https://openalex.org/keywords/object-detection","display_name":"Object detection","score":0.3869999945163727},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.3700999915599823},{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.3400999903678894},{"id":"https://openalex.org/keywords/exploit","display_name":"Exploit","score":0.33160001039505005},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.32109999656677246}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8052999973297119},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.6858000159263611},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6262999773025513},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6225000023841858},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5198000073432922},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.412200003862381},{"id":"https://openalex.org/C2776151529","wikidata":"https://www.wikidata.org/wiki/Q3045304","display_name":"Object detection","level":3,"score":0.3869999945163727},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.3700999915599823},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3531999886035919},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.3400999903678894},{"id":"https://openalex.org/C165696696","wikidata":"https://www.wikidata.org/wiki/Q11287","display_name":"Exploit","level":2,"score":0.33160001039505005},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.32109999656677246},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.32019999623298645},{"id":"https://openalex.org/C137270730","wikidata":"https://www.wikidata.org/wiki/Q120811","display_name":"Detection theory","level":3,"score":0.31360000371932983},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3043000102043152},{"id":"https://openalex.org/C202615002","wikidata":"https://www.wikidata.org/wiki/Q783507","display_name":"Differentiable function","level":2,"score":0.2969000041484833},{"id":"https://openalex.org/C100675267","wikidata":"https://www.wikidata.org/wiki/Q1371624","display_name":"Background noise","level":2,"score":0.29429998993873596},{"id":"https://openalex.org/C63479239","wikidata":"https://www.wikidata.org/wiki/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.2799000144004822},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.27079999446868896},{"id":"https://openalex.org/C108583219","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep learning","level":2,"score":0.26570001244544983},{"id":"https://openalex.org/C2984842247","wikidata":"https://www.wikidata.org/wiki/Q197536","display_name":"Deep neural networks","level":3,"score":0.26330000162124634},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.257999986410141},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.25609999895095825},{"id":"https://openalex.org/C61797465","wikidata":"https://www.wikidata.org/wiki/Q1188986","display_name":"Term (time)","level":2,"score":0.2556999921798706},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.2549000084400177}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3805622.3810673","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810673","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3805622.3810673","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3805622.3810673","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2026 International Conference on Multimedia Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[{"score":0.6519676446914673,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W2040467972","https://openalex.org/W2099813784","https://openalex.org/W2964051877","https://openalex.org/W3102848065","https://openalex.org/W3152792492","https://openalex.org/W3201622928","https://openalex.org/W4214612132","https://openalex.org/W4285210452","https://openalex.org/W4380302040","https://openalex.org/W4386071707","https://openalex.org/W4386076128","https://openalex.org/W4388666562","https://openalex.org/W4391302483","https://openalex.org/W4391344842","https://openalex.org/W4392050216","https://openalex.org/W4393153999","https://openalex.org/W4403622998","https://openalex.org/W4412945090","https://openalex.org/W4412945283","https://openalex.org/W4415707676","https://openalex.org/W7130991205"],"related_works":[],"abstract_inverted_index":{"Implicit":[0],"hate":[1],"speech":[2],"detection":[3],"in":[4,48,184],"videos":[5],"is":[6],"a":[7,60,65,100,110,122,153],"complex":[8],"multimodal":[9],"task":[10],"that":[11,106,142,175],"aims":[12],"to":[13,42,70,79,113,127],"uncover":[14],"malicious":[15,66],"intent":[16],"masked":[17],"by":[18,36,152],"coded":[19],"language,":[20],"sarcasm,":[21],"and":[22,116,162,171],"visual":[23,62],"metaphors.":[24],"However,":[25],"existing":[26,182],"state-of-the-art":[27,178],"approaches":[28],"predominantly":[29],"rely":[30],"on":[31,168],"symmetric":[32,96],"fusion":[33],"paradigms":[34],"driven":[35],"consistency-seeking":[37],"objectives,":[38],"which":[39],"fundamentally":[40],"fail":[41],"capture":[43,128],"the":[44,54,89,169],"structural":[45],"conflict":[46,124],"inherent":[47],"implicit":[49,186],"hate;":[50],"they":[51],"inadvertently":[52],"treat":[53],"defining":[55],"cross-modal":[56],"incongruities,":[57],"such":[58],"as":[59,76,109],"cheerful":[61],"scene":[63],"contradicting":[64],"caption\u2014as":[67],"alignment":[68],"noise":[69],"be":[71,80],"smoothed":[72],"out,":[73],"rather":[74],"than":[75],"critical":[77],"signals":[78,131],"amplified.":[81],"To":[82],"address":[83],"this":[84],"\u201cAlignment":[85],"Trap,\u201d":[86],"we":[87,135],"propose":[88],"Temporal-Incongruity":[90],"Hate":[91],"Detection":[92],"(TIHD)":[93],"framework.":[94],"Unlike":[95],"approaches,":[97],"TIHD":[98,157,176],"introduces":[99],"Query-Guided":[101],"Conflict":[102],"Inference":[103],"Network":[104],"(QGC-Net)":[105],"leverages":[107],"text":[108],"semantic":[111],"anchor":[112],"explicitly":[114],"retrieve":[115],"amplify":[117],"contradictory":[118],"audio-visual":[119],"features":[120],"via":[121],"learned":[123],"gate.":[125],"Furthermore,":[126],"transient":[129],"hateful":[130],"without":[132],"frame-level":[133],"supervision,":[134],"devise":[136],"an":[137],"Incongruity-Aware":[138],"Alignment":[139],"(IAA)":[140],"module":[141],"performs":[143],"differentiable":[144],"soft-alignment":[145],"scanning":[146],"with":[147],"adaptive":[148],"temporal":[149],"dynamics.":[150],"Complemented":[151],"two-stage":[154],"learning":[155],"strategy,":[156],"effectively":[158],"learns":[159],"robust":[160],"representations":[161],"precise":[163],"decision":[164],"boundaries.":[165],"Extensive":[166],"experiments":[167],"ImpliHateVid":[170],"HateMM":[172],"benchmarks":[173],"demonstrate":[174],"achieves":[177],"performance,":[179],"significantly":[180],"outperforming":[181],"baselines":[183],"unearthing":[185],"hate.":[187]},"counts_by_year":[],"updated_date":"2026-06-16T07:37:23.134862","created_date":"2026-06-16T00:00:00"}
