{"id":"https://openalex.org/W4304083155","doi":"https://doi.org/10.1145/3503161.3548309","title":"DHHN: Dual Hierarchical Hybrid Network for Weakly-Supervised Audio-Visual Video Parsing","display_name":"DHHN: Dual Hierarchical Hybrid Network for Weakly-Supervised Audio-Visual Video Parsing","publication_year":2022,"publication_date":"2022-10-10","ids":{"openalex":"https://openalex.org/W4304083155","doi":"https://doi.org/10.1145/3503161.3548309"},"language":"en","primary_location":{"id":"doi:10.1145/3503161.3548309","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548309","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101786662","display_name":"Xun Jiang","orcid":"https://orcid.org/0000-0003-2209-651X"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Xun Jiang","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009074046","display_name":"Xing Xu","orcid":"https://orcid.org/0000-0001-5685-3123"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xing Xu","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100742805","display_name":"Zhiguo Chen","orcid":"https://orcid.org/0000-0002-9541-1894"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiguo Chen","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059349096","display_name":"Jingran Zhang","orcid":null},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingran Zhang","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5036987388","display_name":"Jingkuan Song","orcid":"https://orcid.org/0000-0002-2549-8322"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jingkuan Song","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074492050","display_name":"Fumin Shen","orcid":"https://orcid.org/0000-0001-7303-3231"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Fumin Shen","raw_affiliation_strings":["University of Electronic Science and Technology of China, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China, Chengdu, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5019035804","display_name":"Huimin Lu","orcid":"https://orcid.org/0000-0001-9794-3221"},"institutions":[{"id":"https://openalex.org/I207014233","display_name":"Kyushu Institute of Technology","ror":"https://ror.org/02278tr80","country_code":"JP","type":"education","lineage":["https://openalex.org/I207014233"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"Huimin Lu","raw_affiliation_strings":["Kyushu Institute of Technology, Kitakyushu, China"],"affiliations":[{"raw_affiliation_string":"Kyushu Institute of Technology, Kitakyushu, China","institution_ids":["https://openalex.org/I207014233"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5052993469","display_name":"Heng Tao Shen","orcid":"https://orcid.org/0000-0002-2999-2088"},"institutions":[{"id":"https://openalex.org/I4210136793","display_name":"Peng Cheng Laboratory","ror":"https://ror.org/03qdqbt06","country_code":"CN","type":"facility","lineage":["https://openalex.org/I4210136793"]},{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Heng Tao Shen","raw_affiliation_strings":["University of Electronic Science and Technology of China and Peng Cheng Laboratory, Chengdu, China"],"affiliations":[{"raw_affiliation_string":"University of Electronic Science and Technology of China and Peng Cheng Laboratory, Chengdu, China","institution_ids":["https://openalex.org/I4210136793","https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5101786662"],"corresponding_institution_ids":["https://openalex.org/I150229711"],"apc_list":null,"apc_paid":null,"fwci":4.0502,"has_fulltext":false,"cited_by_count":33,"citation_normalized_percentile":{"value":0.95703002,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":97,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"719","last_page":"727"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10515","display_name":"Cancer-related molecular mechanisms research","score":0.9911999702453613,"subfield":{"id":"https://openalex.org/subfields/1306","display_name":"Cancer Research"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8498615622520447},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.7727583050727844},{"id":"https://openalex.org/keywords/modalities","display_name":"Modalities","score":0.6467147469520569},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6242957711219788},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5597542524337769},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5112946033477783},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.44881054759025574},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.4451161324977875},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.43189069628715515},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.39344877004623413},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.3683020770549774},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3516525626182556},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.11659464240074158}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8498615622520447},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.7727583050727844},{"id":"https://openalex.org/C2779903281","wikidata":"https://www.wikidata.org/wiki/Q6888026","display_name":"Modalities","level":2,"score":0.6467147469520569},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6242957711219788},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5597542524337769},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5112946033477783},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.44881054759025574},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.4451161324977875},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.43189069628715515},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.39344877004623413},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.3683020770549774},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3516525626182556},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.11659464240074158},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C36289849","wikidata":"https://www.wikidata.org/wiki/Q34749","display_name":"Social science","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3503161.3548309","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3503161.3548309","pdf_url":null,"source":{"id":"https://openalex.org/S4363608757","display_name":"Proceedings of the 30th ACM International Conference on Multimedia","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 30th ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5699999928474426,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[{"id":"https://openalex.org/G5133417063","display_name":null,"funder_award_id":"2018AAA0102200","funder_id":"https://openalex.org/F4320335777","funder_display_name":"National Key Research and Development Program of China"}],"funders":[{"id":"https://openalex.org/F4320335777","display_name":"National Key Research and Development Program of China","ror":null}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":36,"referenced_works":["https://openalex.org/W2092392156","https://openalex.org/W2108598243","https://openalex.org/W2186222003","https://openalex.org/W2194775991","https://openalex.org/W2593116425","https://openalex.org/W2619697695","https://openalex.org/W2931433835","https://openalex.org/W2963155035","https://openalex.org/W2971680695","https://openalex.org/W2981851635","https://openalex.org/W2990113535","https://openalex.org/W3009086519","https://openalex.org/W3015925607","https://openalex.org/W3019023591","https://openalex.org/W3034556939","https://openalex.org/W3034658206","https://openalex.org/W3048939150","https://openalex.org/W3049148844","https://openalex.org/W3093287838","https://openalex.org/W3099638501","https://openalex.org/W3111984153","https://openalex.org/W3116298410","https://openalex.org/W3118120400","https://openalex.org/W3169318522","https://openalex.org/W3173388686","https://openalex.org/W3175335326","https://openalex.org/W3175514052","https://openalex.org/W3182657421","https://openalex.org/W3187885408","https://openalex.org/W3196863408","https://openalex.org/W3197819226","https://openalex.org/W3217578129","https://openalex.org/W4205605843","https://openalex.org/W4214497471","https://openalex.org/W4214759869","https://openalex.org/W4225620527"],"related_works":["https://openalex.org/W2185469136","https://openalex.org/W2011264131","https://openalex.org/W4306353150","https://openalex.org/W2026860389","https://openalex.org/W8219677","https://openalex.org/W3216879894","https://openalex.org/W2890132085","https://openalex.org/W2168054807","https://openalex.org/W2058990474","https://openalex.org/W2941608206"],"abstract_inverted_index":{"The":[0,91],"Weakly-Supervised":[1],"Audio-Visual":[2],"Video":[3],"Parsing":[4],"(AVVP)":[5],"task":[6,49],"aims":[7],"to":[8,61,106,112,141],"parse":[9],"a":[10,85,132],"video":[11,55,70,201],"into":[12],"temporal":[13,34,97,166],"segments":[14],"and":[15,36,69,72,186,206],"predict":[16],"their":[17],"event":[18,44],"categories":[19],"in":[20,96,164],"terms":[21],"of":[22,120,151],"modalities,":[23,197],"labeling":[24],"them":[25],"as":[26],"either":[27],"audible,":[28],"visible,":[29],"or":[30],"both.":[31],"Since":[32],"the":[33,67,77,104,113,117,143,192,200,215,220],"boundaries":[35],"modalities":[37,122],"annotations":[38],"are":[39,46,123],"not":[40,124],"provided,":[41],"only":[42],"video-level":[43],"labels":[45],"available,":[47],"this":[48,128],"is":[50,99],"more":[51],"challenging":[52],"than":[53],"conventional":[54],"understanding":[56],"tasks.Most":[57],"previous":[58],"works":[59],"attempt":[60],"analyze":[62],"videos":[63],"by":[64],"jointly":[65],"modeling":[66,158],"audio":[68,185],"data":[71],"then":[73],"learning":[74,174],"information":[75,94,176],"from":[76,177],"segment-level":[78],"features":[79,119],"with":[80],"fixed":[81],"lengths.":[82],"However,":[83],"such":[84],"design":[86],"exist":[87],"two":[88,145],"defects:":[89],"1)":[90,154],"various":[92],"semantic":[93],"hidden":[95],"lengths":[98],"neglected,":[100],"which":[101],"may":[102],"lead":[103],"models":[105],"learn":[107],"incorrect":[108],"information;":[109],"2)":[110,168],"Due":[111],"joint":[114],"context":[115,157],"modeling,":[116],"unique":[118,175],"different":[121,162,178,196],"fully":[125],"explored.":[126],"In":[127],"paper,":[129],"we":[130],"propose":[131],"novel":[133],"AVVP":[134,221],"framework":[135,183],"termedDual":[136],"Hierarchical":[137],"Hybrid":[138],"Network":[139],"(DHHN)":[140],"tackle":[142],"above":[144],"problems.":[146],"Our":[147],"DHHN":[148],"method":[149,213],"consists":[150],"three":[152],"components:":[153],"A":[155,169,181],"hierarchical":[156],"network":[159,172],"for":[160,173],"extracting":[161],"semantics":[163],"multiple":[165],"lengths;":[167],"modality-wise":[170],"guiding":[171],"modalities;":[179],"3)":[180],"dual-stream":[182],"generating":[184],"visual":[187],"predictions":[188],"separately.":[189],"It":[190],"maintains":[191],"best":[193],"adaptions":[194],"on":[195,219],"further":[198],"boosting":[199],"parsing":[202],"performance.":[203],"Extensive":[204],"quantitative":[205],"qualitative":[207],"experiments":[208],"demonstrate":[209],"that":[210],"our":[211],"proposed":[212],"establishes":[214],"new":[216],"state-of-the-art":[217],"performance":[218],"task.":[222]},"counts_by_year":[{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":21},{"year":2023,"cited_by_count":5}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
