{"id":"https://openalex.org/W2043194666","doi":"https://doi.org/10.1109/icassp.2014.6853819","title":"Exploring audio semantic concepts for event-based video retrieval","display_name":"Exploring audio semantic concepts for event-based video retrieval","publication_year":2014,"publication_date":"2014-05-01","ids":{"openalex":"https://openalex.org/W2043194666","doi":"https://doi.org/10.1109/icassp.2014.6853819","mag":"2043194666"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2014.6853819","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2014.6853819","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://figshare.com/articles/journal_contribution/Exploring_audio_semantic_concepts_for_event-based_video_retrieval/6473366","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101401275","display_name":"Yipei Wang","orcid":"https://orcid.org/0000-0002-9589-7177"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yipei Wang","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, U.S.A","Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, U.S.A","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086192353","display_name":"Shourabh Rawat","orcid":"https://orcid.org/0009-0000-0160-3741"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shourabh Rawat","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, U.S.A","Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, U.S.A","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085262529","display_name":"Florian Metze","orcid":"https://orcid.org/0000-0002-6663-8600"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Florian Metze","raw_affiliation_strings":["Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, U.S.A","Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Language Technologies Institute, Carnegie Mellon University, Pittsburgh, PA, U.S.A","institution_ids":["https://openalex.org/I74973139"]},{"raw_affiliation_string":"Language Technology Institute, Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101401275"],"corresponding_institution_ids":["https://openalex.org/I74973139"],"apc_list":null,"apc_paid":null,"fwci":2.0596,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.87302583,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":"3229","issue":null,"first_page":"1360","last_page":"1364"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9929999709129333,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9919999837875366,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8706878423690796},{"id":"https://openalex.org/keywords/event","display_name":"Event (particle physics)","score":0.6142568588256836},{"id":"https://openalex.org/keywords/feature-extraction","display_name":"Feature extraction","score":0.5290483832359314},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5014867782592773},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.48765504360198975},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4644223749637604},{"id":"https://openalex.org/keywords/nist","display_name":"NIST","score":0.45524945855140686},{"id":"https://openalex.org/keywords/semantic-computing","display_name":"Semantic computing","score":0.453279584646225},{"id":"https://openalex.org/keywords/audio-analyzer","display_name":"Audio analyzer","score":0.4519875943660736},{"id":"https://openalex.org/keywords/semantics","display_name":"Semantics (computer science)","score":0.44821515679359436},{"id":"https://openalex.org/keywords/semantic-similarity","display_name":"Semantic similarity","score":0.4472804069519043},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.43846723437309265},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4127744734287262},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.4102741777896881},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3746577501296997},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.354658305644989},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.3095000386238098},{"id":"https://openalex.org/keywords/semantic-web","display_name":"Semantic Web","score":0.15740695595741272}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8706878423690796},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.6142568588256836},{"id":"https://openalex.org/C52622490","wikidata":"https://www.wikidata.org/wiki/Q1026626","display_name":"Feature extraction","level":2,"score":0.5290483832359314},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5014867782592773},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.48765504360198975},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4644223749637604},{"id":"https://openalex.org/C111219384","wikidata":"https://www.wikidata.org/wiki/Q6954384","display_name":"NIST","level":2,"score":0.45524945855140686},{"id":"https://openalex.org/C511149849","wikidata":"https://www.wikidata.org/wiki/Q7449051","display_name":"Semantic computing","level":3,"score":0.453279584646225},{"id":"https://openalex.org/C160372630","wikidata":"https://www.wikidata.org/wiki/Q4819855","display_name":"Audio analyzer","level":5,"score":0.4519875943660736},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.44821515679359436},{"id":"https://openalex.org/C130318100","wikidata":"https://www.wikidata.org/wiki/Q2268914","display_name":"Semantic similarity","level":2,"score":0.4472804069519043},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.43846723437309265},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4127744734287262},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.4102741777896881},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3746577501296997},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.354658305644989},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.3095000386238098},{"id":"https://openalex.org/C2129575","wikidata":"https://www.wikidata.org/wiki/Q54837","display_name":"Semantic Web","level":2,"score":0.15740695595741272},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C62520636","wikidata":"https://www.wikidata.org/wiki/Q944","display_name":"Quantum mechanics","level":1,"score":0.0},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.1109/icassp.2014.6853819","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2014.6853819","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},{"id":"pmh:oai:repository.cmu.edu:lti-1076","is_oa":false,"landing_page_url":"http://repository.cmu.edu/cgi/viewcontent.cgi?article=1076&context=lti","pdf_url":null,"source":{"id":"https://openalex.org/S4306400668","display_name":"Research Showcase @ Carnegie Mellon University (Carnegie Mellon University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I74973139","host_organization_name":"Carnegie Mellon University","host_organization_lineage":["https://openalex.org/I74973139"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Language Technologies Institute","raw_type":"text"},{"id":"pmh:oai:CiteSeerX.psu:10.1.1.947.1753","is_oa":false,"landing_page_url":"http://citeseerx.ist.psu.edu/viewdoc/summary?doi=10.1.1.947.1753","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"http://repository.cmu.edu/cgi/viewcontent.cgi?article%3D1076%26context%3Dlti","raw_type":"text"},{"id":"pmh:oai:figshare.com:article/6473366","is_oa":true,"landing_page_url":"https://figshare.com/articles/journal_contribution/Exploring_audio_semantic_concepts_for_event-based_video_retrieval/6473366","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},{"id":"doi:10.1184/r1/6473366.v1","is_oa":true,"landing_page_url":"https://doi.org/10.1184/r1/6473366.v1","pdf_url":null,"source":{"id":"https://openalex.org/S7407050927","display_name":"KiltHub Repository","issn_l":null,"issn":[],"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:figshare.com:article/6473366","is_oa":true,"landing_page_url":"https://figshare.com/articles/journal_contribution/Exploring_audio_semantic_concepts_for_event-based_video_retrieval/6473366","pdf_url":null,"source":{"id":"https://openalex.org/S4377196282","display_name":"Figshare","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210132348","host_organization_name":"Figshare (United Kingdom)","host_organization_lineage":["https://openalex.org/I4210132348"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Text"},"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https://openalex.org/F4320306116","display_name":"U.S. Department of the Interior","ror":"https://ror.org/03v0pmy70"},{"id":"https://openalex.org/F4320333452","display_name":"Interior Business Center","ror":null}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":14,"referenced_works":["https://openalex.org/W1535031652","https://openalex.org/W1545046063","https://openalex.org/W1600745603","https://openalex.org/W1956559956","https://openalex.org/W1979246310","https://openalex.org/W2074188409","https://openalex.org/W2083837083","https://openalex.org/W2101234009","https://openalex.org/W2111331420","https://openalex.org/W2119821739","https://openalex.org/W2134756243","https://openalex.org/W2144037622","https://openalex.org/W4239510810","https://openalex.org/W6679740967"],"related_works":["https://openalex.org/W2098934641","https://openalex.org/W1975359510","https://openalex.org/W2494533082","https://openalex.org/W4214771044","https://openalex.org/W4387698063","https://openalex.org/W4382560817","https://openalex.org/W3004352674","https://openalex.org/W3110605476","https://openalex.org/W1803351015","https://openalex.org/W2363106653"],"abstract_inverted_index":{"The":[0,93,100],"audio":[1,114],"semantic":[2,17,48,80,115],"concepts":[3,116],"(sound":[4],"events)":[5],"play":[6],"important":[7],"roles":[8],"in":[9,28,51,120],"audio-based":[10],"content":[11],"analysis.":[12],"How":[13],"to":[14,42,77],"capture":[15,117],"the":[16,21,44,57,66,83,87,105,121],"information":[18,49,119],"effectively":[19],"from":[20,82],"complex":[22,45],"occurrence":[23,67],"pattern":[24],"of":[25,70],"sound":[26,71],"events":[27,72],"YouTube":[29],"quality":[30],"videos":[31,53],"is":[32],"a":[33,39],"challenging":[34],"problem.":[35],"This":[36],"paper":[37],"presents":[38],"novel":[40],"framework":[41],"handle":[43],"situation":[46],"for":[47],"extraction":[50],"real-world":[52],"and":[54,73,108],"evaluate":[55,86],"through":[56],"NIST":[58],"multimedia":[59],"event":[60],"detection":[61],"task":[62],"(MED).":[63],"We":[64,85],"calculate":[65],"confidence":[68],"matrix":[69],"explore":[74],"multiple":[75],"strategies":[76],"generate":[78],"clip-level":[79],"features":[81,107],"matrix.":[84],"performance":[88],"using":[89],"TRECVID2011":[90],"MED":[91],"dataset.":[92],"proposed":[94],"method":[95],"outperforms":[96],"previous":[97],"HMM-based":[98],"system.":[99],"late":[101],"fusion":[102],"experiment":[103],"with":[104],"low-level":[106],"text":[109],"feature":[110],"(ASR)":[111],"shows":[112],"that":[113],"complementary":[118],"soundtrack.":[122]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":3},{"year":2020,"cited_by_count":5},{"year":2019,"cited_by_count":4},{"year":2018,"cited_by_count":2},{"year":2017,"cited_by_count":5},{"year":2016,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
