{"id":"https://openalex.org/W2002044752","doi":"https://doi.org/10.1145/1823746.1823748","title":"Audio-visual atoms for generic video concept classification","display_name":"Audio-visual atoms for generic video concept classification","publication_year":2010,"publication_date":"2010-08-01","ids":{"openalex":"https://openalex.org/W2002044752","doi":"https://doi.org/10.1145/1823746.1823748","mag":"2002044752"},"language":"en","primary_location":{"id":"doi:10.1145/1823746.1823748","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1823746.1823748","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002942261","display_name":"Wei Jiang","orcid":"https://orcid.org/0000-0002-9240-5851"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Wei Jiang","raw_affiliation_strings":["Columbia University, New York, NY","Columbia University, New York, NY;"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY","institution_ids":["https://openalex.org/I78577930"]},{"raw_affiliation_string":"Columbia University, New York, NY;","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076254512","display_name":"Courtenay V. Cotton","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Courtenay Cotton","raw_affiliation_strings":["Columbia University, New York, NY","Columbia University, New York, NY;"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY","institution_ids":["https://openalex.org/I78577930"]},{"raw_affiliation_string":"Columbia University, New York, NY;","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5037340457","display_name":"Shih\u2010Fu Chang","orcid":"https://orcid.org/0000-0003-1444-1205"},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shih-Fu Chang","raw_affiliation_strings":["Columbia University, New York, NY","Columbia University, New York, NY;"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY","institution_ids":["https://openalex.org/I78577930"]},{"raw_affiliation_string":"Columbia University, New York, NY;","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5031069544","display_name":"Dan Ellis","orcid":null},"institutions":[{"id":"https://openalex.org/I78577930","display_name":"Columbia University","ror":"https://ror.org/00hj8s172","country_code":"US","type":"education","lineage":["https://openalex.org/I78577930"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dan Ellis","raw_affiliation_strings":["Columbia University, New York, NY","Columbia University, New York, NY;"],"affiliations":[{"raw_affiliation_string":"Columbia University, New York, NY","institution_ids":["https://openalex.org/I78577930"]},{"raw_affiliation_string":"Columbia University, New York, NY;","institution_ids":["https://openalex.org/I78577930"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5051767562","display_name":"Alexander C. Loui","orcid":"https://orcid.org/0000-0002-7427-1503"},"institutions":[{"id":"https://openalex.org/I175669267","display_name":"Carestream (United States)","ror":"https://ror.org/048m16q57","country_code":"US","type":"company","lineage":["https://openalex.org/I175669267"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Alexander C. Loui","raw_affiliation_strings":["Eastman Kodak Company, Rochester, NY"],"affiliations":[{"raw_affiliation_string":"Eastman Kodak Company, Rochester, NY","institution_ids":["https://openalex.org/I175669267"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5002942261"],"corresponding_institution_ids":["https://openalex.org/I78577930"],"apc_list":null,"apc_paid":null,"fwci":2.6971,"has_fulltext":false,"cited_by_count":9,"citation_normalized_percentile":{"value":0.90111529,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"6","issue":"3","first_page":"1","last_page":"19"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9987999796867371,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9915000200271606,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/discriminative-model","display_name":"Discriminative model","score":0.7984774112701416},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7434812784194946},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.7138827443122864},{"id":"https://openalex.org/keywords/audio-visual","display_name":"Audio visual","score":0.6840802431106567},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5653613209724426},{"id":"https://openalex.org/keywords/energy","display_name":"Energy (signal processing)","score":0.47511395812034607},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4603644609451294},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.41371169686317444},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.40132802724838257},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.38310202956199646},{"id":"https://openalex.org/keywords/multimedia","display_name":"Multimedia","score":0.17928317189216614},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08800011873245239}],"concepts":[{"id":"https://openalex.org/C97931131","wikidata":"https://www.wikidata.org/wiki/Q5282087","display_name":"Discriminative model","level":2,"score":0.7984774112701416},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7434812784194946},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.7138827443122864},{"id":"https://openalex.org/C3017588708","wikidata":"https://www.wikidata.org/wiki/Q758901","display_name":"Audio visual","level":2,"score":0.6840802431106567},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5653613209724426},{"id":"https://openalex.org/C186370098","wikidata":"https://www.wikidata.org/wiki/Q442787","display_name":"Energy (signal processing)","level":2,"score":0.47511395812034607},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4603644609451294},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.41371169686317444},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.40132802724838257},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.38310202956199646},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.17928317189216614},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08800011873245239},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C13280743","wikidata":"https://www.wikidata.org/wiki/Q131089","display_name":"Geodesy","level":1,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.0},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/1823746.1823748","is_oa":false,"landing_page_url":"https://doi.org/10.1145/1823746.1823748","pdf_url":null,"source":{"id":"https://openalex.org/S19610489","display_name":"ACM Transactions on Multimedia Computing Communications and Applications","issn_l":"1551-6857","issn":["1551-6857","1551-6865"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319798","host_organization_name":"Association for Computing Machinery","host_organization_lineage":["https://openalex.org/P4310319798"],"host_organization_lineage_names":["Association for Computing Machinery"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ACM Transactions on Multimedia Computing, Communications, and Applications","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/10","score":0.6700000166893005,"display_name":"Reduced inequalities"}],"awards":[{"id":"https://openalex.org/G7031844454","display_name":null,"funder_award_id":"CNS-07-16293CNS-07-51078","funder_id":"https://openalex.org/F4320337388","funder_display_name":"Division of Computer and Network Systems"}],"funders":[{"id":"https://openalex.org/F4320337388","display_name":"Division of Computer and Network Systems","ror":"https://ror.org/02rdzmk74"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W152287906","https://openalex.org/W1510601258","https://openalex.org/W1523506985","https://openalex.org/W1538021842","https://openalex.org/W1563256181","https://openalex.org/W1581253957","https://openalex.org/W1999075139","https://openalex.org/W2016067832","https://openalex.org/W2022692239","https://openalex.org/W2024046085","https://openalex.org/W2036931824","https://openalex.org/W2038735287","https://openalex.org/W2042707163","https://openalex.org/W2062903088","https://openalex.org/W2073295754","https://openalex.org/W2077072595","https://openalex.org/W2100032013","https://openalex.org/W2103861099","https://openalex.org/W2104019579","https://openalex.org/W2104978738","https://openalex.org/W2109124605","https://openalex.org/W2111918405","https://openalex.org/W2115517344","https://openalex.org/W2118877769","https://openalex.org/W2130103520","https://openalex.org/W2136595724","https://openalex.org/W2140647972","https://openalex.org/W2151103935","https://openalex.org/W2151693816","https://openalex.org/W2154318594","https://openalex.org/W2158169396","https://openalex.org/W2171819471","https://openalex.org/W2613779721","https://openalex.org/W4236965008"],"related_works":["https://openalex.org/W2073139667","https://openalex.org/W1652783584","https://openalex.org/W2729514902","https://openalex.org/W2024160000","https://openalex.org/W1990254706","https://openalex.org/W2773500201","https://openalex.org/W2061273563","https://openalex.org/W2285052147","https://openalex.org/W4287995534","https://openalex.org/W2743258233"],"abstract_inverted_index":{"We":[0,16,41],"investigate":[1],"the":[2,58,95],"challenging":[3],"issue":[4],"of":[5,9,97],"joint":[6],"audio-visual":[7,81],"analysis":[8],"generic":[10,51],"videos":[11,93],"targeting":[12],"at":[13],"concept":[14,86],"detection.":[15,87],"extract":[17,47],"a":[18,29,43],"novel":[19],"local":[20],"representation,":[21],"Audio-Visual":[22],"Atom":[23],"(AVA),":[24],"which":[25,79],"is":[26],"defined":[27],"as":[28],"region":[30],"track":[31],"associated":[32],"with":[33],"regional":[34],"visual":[35,48],"features":[36],"and":[37,53,72],"audio":[38,73],"onset":[39],"features.":[40],"develop":[42],"hierarchical":[44],"algorithm":[45],"to":[46],"atoms":[49,65,74],"from":[50,57],"videos,":[52],"locate":[54],"energy":[55,69],"onsets":[56],"corresponding":[59],"soundtrack":[60],"by":[61],"time-frequency":[62],"analysis.":[63],"Audio":[64],"are":[66,83],"extracted":[67],"around":[68],"onsets.":[70],"Visual":[71],"form":[75],"AVAs,":[76],"based":[77],"on":[78],"discriminative":[80],"codebooks":[82],"constructed":[84],"for":[85],"Experiments":[88],"over":[89],"Kodak's":[90],"consumer":[91],"benchmark":[92],"confirm":[94],"effectiveness":[96],"our":[98],"approach.":[99]},"counts_by_year":[{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":2},{"year":2012,"cited_by_count":4}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
