{"id":"https://openalex.org/W4387968342","doi":"https://doi.org/10.1145/3581783.3612506","title":"Learning Event-Specific Localization Preferences for Audio-Visual Event Localization","display_name":"Learning Event-Specific Localization Preferences for Audio-Visual Event Localization","publication_year":2023,"publication_date":"2023-10-26","ids":{"openalex":"https://openalex.org/W4387968342","doi":"https://doi.org/10.1145/3581783.3612506"},"language":"en","primary_location":{"id":"doi:10.1145/3581783.3612506","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612506","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5044227484","display_name":"Shiping Ge","orcid":"https://orcid.org/0000-0001-9198-5324"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Shiping Ge","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0001-9198-5324","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082553860","display_name":"Zhiwei Jiang","orcid":"https://orcid.org/0000-0001-5243-4992"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiwei Jiang","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0001-5243-4992","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5087888694","display_name":"Yafeng Yin","orcid":"https://orcid.org/0000-0002-9497-6244"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yafeng Yin","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-9497-6244","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015574942","display_name":"Cong Wang","orcid":"https://orcid.org/0000-0003-0916-7803"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cong Wang","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0003-0916-7803","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033620369","display_name":"Zifeng Cheng","orcid":"https://orcid.org/0000-0002-8486-2614"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zifeng Cheng","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-8486-2614","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5061025205","display_name":"Qing Gu","orcid":"https://orcid.org/0000-0002-1112-790X"},"institutions":[{"id":"https://openalex.org/I881766915","display_name":"Nanjing University","ror":"https://ror.org/01rxvg760","country_code":"CN","type":"education","lineage":["https://openalex.org/I881766915"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Qing Gu","raw_affiliation_strings":["Nanjing University, Nanjing, China"],"raw_orcid":"https://orcid.org/0000-0002-1112-790X","affiliations":[{"raw_affiliation_string":"Nanjing University, Nanjing, China","institution_ids":["https://openalex.org/I881766915"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5044227484"],"corresponding_institution_ids":["https://openalex.org/I881766915"],"apc_list":null,"apc_paid":null,"fwci":1.727,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.85793872,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"3446","last_page":"3454"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11665","display_name":"Animal Vocal Communication and Behavior","score":0.9699000120162964,"subfield":{"id":"https://openalex.org/subfields/1309","display_name":"Developmental Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.8059107065200806},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7618476152420044},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.6483380794525146},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.6345021724700928},{"id":"https://openalex.org/keywords/modality","display_name":"Modality (human\u2013computer interaction)","score":0.6197066307067871},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6146765947341919},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5614174604415894},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.5281262993812561},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.5263584852218628},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5136334300041199},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.41193097829818726},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.3578742742538452},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3445425033569336},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.09943276643753052},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.07576954364776611}],"concepts":[{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.8059107065200806},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7618476152420044},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.6483380794525146},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.6345021724700928},{"id":"https://openalex.org/C2780226545","wikidata":"https://www.wikidata.org/wiki/Q6888030","display_name":"Modality (human\u2013computer interaction)","level":2,"score":0.6197066307067871},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6146765947341919},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5614174604415894},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.5281262993812561},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.5263584852218628},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5136334300041199},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.41193097829818726},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.3578742742538452},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3445425033569336},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.09943276643753052},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.07576954364776611},{"id":"https://openalex.org/C120665830","wikidata":"https://www.wikidata.org/wiki/Q14620","display_name":"Optics","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3581783.3612506","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3581783.3612506","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 31st ACM International Conference on Multimedia","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G3757583711","display_name":null,"funder_award_id":"62172208","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G5612877841","display_name":null,"funder_award_id":"61972192, 62172208, 61906085, 41972111","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6427539792","display_name":null,"funder_award_id":"61972192","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G6652830868","display_name":null,"funder_award_id":"61906085","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"},{"id":"https://openalex.org/G7830107406","display_name":null,"funder_award_id":"41972111","funder_id":"https://openalex.org/F4320321001","funder_display_name":"National Natural Science Foundation of China"}],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":23,"referenced_works":["https://openalex.org/W2108598243","https://openalex.org/W2187089797","https://openalex.org/W2593116425","https://openalex.org/W2964109005","https://openalex.org/W2990113535","https://openalex.org/W2997909293","https://openalex.org/W3015925607","https://openalex.org/W3035524453","https://openalex.org/W3093287838","https://openalex.org/W3094550259","https://openalex.org/W3170717896","https://openalex.org/W3175514052","https://openalex.org/W3176445421","https://openalex.org/W4221154629","https://openalex.org/W4226206782","https://openalex.org/W4226271114","https://openalex.org/W4304080636","https://openalex.org/W4312280925","https://openalex.org/W4312415534","https://openalex.org/W4312657985","https://openalex.org/W4312700302","https://openalex.org/W4313117614","https://openalex.org/W4319300466"],"related_works":["https://openalex.org/W2271369634","https://openalex.org/W3147472394","https://openalex.org/W2047100085","https://openalex.org/W2350550760","https://openalex.org/W578794879","https://openalex.org/W2625296515","https://openalex.org/W2385859805","https://openalex.org/W3137890128","https://openalex.org/W1984634519","https://openalex.org/W4245955731"],"abstract_inverted_index":{"Audio-Visual":[0],"Event":[1],"Localization":[2],"(AVEL)":[3],"aims":[4],"to":[5,30,48,89,112,140],"locate":[6],"events":[7,34],"that":[8,27,90],"are":[9,28],"both":[10,119,160],"visible":[11],"and":[12,83,122,144,149,162],"audible":[13],"in":[14,61,159],"a":[15,72,101,135],"video.":[16],"Existing":[17],"AVEL":[18],"methods":[19],"primarily":[20],"focus":[21],"on":[22,154],"learning":[23,104],"generic":[24],"localization":[25,50,63,75,86],"patterns":[26],"applicable":[29],"all":[31],"events.":[32],"However,":[33],"often":[35],"exhibit":[36],"modality":[37],"biases,":[38],"such":[39],"as":[40,110],"visual-dominated,":[41],"audio-dominated,":[42],"or":[43],"modality-balanced,":[44],"which":[45,77,106],"can":[46],"lead":[47],"different":[49],"preferences.":[51],"These":[52],"preferences":[53,87],"may":[54],"be":[55,173],"overlooked":[56],"by":[57],"existing":[58],"methods,":[59],"resulting":[60],"unsatisfactory":[62],"performance.":[64],"To":[65,96,125],"address":[66],"this":[67,69,130],"issue,":[68],"paper":[70],"proposes":[71],"novel":[73],"event-aware":[74],"paradigm,":[76],"first":[78],"identifies":[79],"the":[80,114,142,155,166],"event":[81,91,94,120,123],"category":[82],"then":[84],"leverages":[85],"specific":[88],"for":[92,118,129],"improved":[93],"localization.":[95,124],"achieve":[97],"this,":[98],"we":[99,133],"introduce":[100],"memory-assisted":[102],"metric":[103,131],"framework,":[105],"utilizes":[107],"historic":[108],"segments":[109],"anchors":[111],"adjust":[113],"unified":[115],"representation":[116],"space":[117],"classification":[121],"provide":[126],"sufficient":[127],"information":[128],"learning,":[132],"design":[134],"spatial-temporal":[136],"audio-visual":[137],"fusion":[138],"encoder":[139],"capture":[141],"spatial":[143],"temporal":[145],"interaction":[146],"between":[147],"audio":[148],"visual":[150],"modalities.":[151],"Extensive":[152],"experiments":[153],"public":[156],"AVE":[157],"dataset":[158],"fully-supervised":[161],"weakly-supervised":[163],"settings":[164],"demonstrate":[165],"effectiveness":[167],"of":[168],"our":[169],"approach.":[170],"Code":[171],"will":[172],"released":[174],"at":[175],"https://github.com/ShipingGe/AVEL.":[176]},"counts_by_year":[{"year":2026,"cited_by_count":1},{"year":2025,"cited_by_count":7},{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
