{"id":"https://openalex.org/W2014474048","doi":"https://doi.org/10.1109/msp.2006.1621450","title":"Automatic multimedia indexing: combining audio, speech, and visual information to index broadcast news","display_name":"Automatic multimedia indexing: combining audio, speech, and visual information to index broadcast news","publication_year":2006,"publication_date":"2006-03-01","ids":{"openalex":"https://openalex.org/W2014474048","doi":"https://doi.org/10.1109/msp.2006.1621450","mag":"2014474048"},"language":"en","primary_location":{"id":"doi:10.1109/msp.2006.1621450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msp.2006.1621450","pdf_url":null,"source":{"id":"https://openalex.org/S120977877","display_name":"IEEE Signal Processing Magazine","issn_l":"1053-5888","issn":["1053-5888","1558-0792"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Magazine","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5008591735","display_name":"Katsutoshi Ohtsuki","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"K. Ohtsuki","raw_affiliation_strings":["Institute of Electronics, Information and Communication Engineers (IEICE), ASJ","[Institute of Electronics, Information and Communication Engineers (IEICE), ASJ]"],"affiliations":[{"raw_affiliation_string":"Institute of Electronics, Information and Communication Engineers (IEICE), ASJ","institution_ids":[]},{"raw_affiliation_string":"[Institute of Electronics, Information and Communication Engineers (IEICE), ASJ]","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033292421","display_name":"Katsuji Bessho","orcid":null},"institutions":[{"id":"https://openalex.org/I4387930200","display_name":"Information Processing Society of Japan","ror":"https://ror.org/02v970d04","country_code":null,"type":"nonprofit","lineage":["https://openalex.org/I4387930200"]},{"id":"https://openalex.org/I4210092329","display_name":"Institute of Electronics, Information and Communication Engineers","ror":"https://ror.org/00e1wd906","country_code":"JP","type":"nonprofit","lineage":["https://openalex.org/I4210092329"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"K. Bessho","raw_affiliation_strings":["IEICE and the Information Processing Society of Japan (IPSJ)","[IEICE and the Information Processing Society of Japan (IPSJ)]"],"affiliations":[{"raw_affiliation_string":"IEICE and the Information Processing Society of Japan (IPSJ)","institution_ids":["https://openalex.org/I4210092329","https://openalex.org/I4387930200"]},{"raw_affiliation_string":"[IEICE and the Information Processing Society of Japan (IPSJ)]","institution_ids":["https://openalex.org/I4210092329","https://openalex.org/I4387930200"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026779886","display_name":"Yasumitsu Matsuo","orcid":"https://orcid.org/0000-0003-4971-9326"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Y. Matsuo","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108717707","display_name":"S. Matsunaga","orcid":"https://orcid.org/0009-0004-6462-9518"},"institutions":[{"id":"https://openalex.org/I3130035786","display_name":"University Ucinf","ror":"https://ror.org/02cs9pv18","country_code":"CL","type":"education","lineage":["https://openalex.org/I3130035786"]}],"countries":["CL"],"is_corresponding":false,"raw_author_name":"S. Matsunaga","raw_affiliation_strings":["Department of Computer and Information Sciences","[Department of Computer and Information Sciences]"],"affiliations":[{"raw_affiliation_string":"Department of Computer and Information Sciences","institution_ids":[]},{"raw_affiliation_string":"[Department of Computer and Information Sciences]","institution_ids":["https://openalex.org/I3130035786"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5060812959","display_name":"Yoshikatsu Hayashi","orcid":"https://orcid.org/0000-0002-9207-6322"},"institutions":[{"id":"https://openalex.org/I1344076864","display_name":"Center for Applied Linguistics","ror":"https://ror.org/020pekv35","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1344076864"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Y. Hayashi","raw_affiliation_strings":["ACM Special Interest Group on Information Retrieval (SIGIR), Association for Computational Linguistics (ACL)","[ACM Special Interest Group on Information Retrieval (SIGIR), Association for Computational Linguistics (ACL)]"],"affiliations":[{"raw_affiliation_string":"ACM Special Interest Group on Information Retrieval (SIGIR), Association for Computational Linguistics (ACL)","institution_ids":["https://openalex.org/I1344076864"]},{"raw_affiliation_string":"[ACM Special Interest Group on Information Retrieval (SIGIR), Association for Computational Linguistics (ACL)]","institution_ids":["https://openalex.org/I1344076864"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5008591735"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5901,"has_fulltext":false,"cited_by_count":15,"citation_normalized_percentile":{"value":0.81998805,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":98},"biblio":{"volume":"23","issue":"2","first_page":"69","last_page":"78"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9980000257492065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9962000250816345,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8894613981246948},{"id":"https://openalex.org/keywords/search-engine-indexing","display_name":"Search engine indexing","score":0.8288688659667969},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.7841246128082275},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6179056167602539},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.5710482001304626},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.5567142367362976},{"id":"https://openalex.org/keywords/smoothing","display_name":"Smoothing","score":0.4968922436237335},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.44056209921836853},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.4212993085384369},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.4004736542701721},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.22188982367515564}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8894613981246948},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.8288688659667969},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.7841246128082275},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6179056167602539},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.5710482001304626},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.5567142367362976},{"id":"https://openalex.org/C3770464","wikidata":"https://www.wikidata.org/wiki/Q775963","display_name":"Smoothing","level":2,"score":0.4968922436237335},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.44056209921836853},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.4212993085384369},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.4004736542701721},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.22188982367515564},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/msp.2006.1621450","is_oa":false,"landing_page_url":"https://doi.org/10.1109/msp.2006.1621450","pdf_url":null,"source":{"id":"https://openalex.org/S120977877","display_name":"IEEE Signal Processing Magazine","issn_l":"1053-5888","issn":["1053-5888","1558-0792"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Signal Processing Magazine","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","score":0.46000000834465027,"id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W77193200","https://openalex.org/W164803793","https://openalex.org/W1531793743","https://openalex.org/W1587067473","https://openalex.org/W1828401780","https://openalex.org/W1987532638","https://openalex.org/W2100873065","https://openalex.org/W2115440794","https://openalex.org/W2124988752","https://openalex.org/W2127914268","https://openalex.org/W2131156500","https://openalex.org/W2144345993","https://openalex.org/W2160376883","https://openalex.org/W2162862842","https://openalex.org/W2166655755","https://openalex.org/W2405717073","https://openalex.org/W6606891414","https://openalex.org/W6631842171","https://openalex.org/W6638575021","https://openalex.org/W6684584064","https://openalex.org/W6713617950"],"related_works":["https://openalex.org/W2157598242","https://openalex.org/W2121486117","https://openalex.org/W2619911963","https://openalex.org/W2754746744","https://openalex.org/W2122924390","https://openalex.org/W2137058881","https://openalex.org/W22517275","https://openalex.org/W2030635983","https://openalex.org/W213567481","https://openalex.org/W815866509"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"an":[3],"indexing":[4,26,41],"system":[5,27],"that":[6],"automatically":[7],"creates":[8],"metadata":[9],"for":[10],"multimedia":[11,24],"broadcast":[12],"news":[13,66],"content":[14,25],"by":[15,146],"integrating":[16,128],"audio,":[17],"speech,":[18],"and":[19,39,47,91,136,153],"visual":[20],"information.":[21],"The":[22,43,100,120,139],"automatic":[23,32,73],"includes":[28],"acoustic":[29,76,89,98],"segmentation":[30,37],"(AS),":[31],"speech":[33,56,70],"recognition":[34,71,86],"(ASR),":[35],"topic":[36],"(TS),":[38],"video":[40],"features.":[42],"new":[44],"spectral-based":[45],"features":[46],"smoothing":[48],"method":[49,102,114],"in":[50],"the":[51,55,60,64,69,96,112,125,131,150,154,159],"AS":[52,132,151],"module":[53,123],"improved":[54,145],"detection":[57,142],"performance":[58],"from":[59,130],"audio":[61],"stream":[62],"of":[63,75,127],"input":[65],"content.":[67],"In":[68],"module,":[72,133,135],"selection":[74],"models":[77],"achieved":[78,107],"both":[79],"a":[80],"low":[81],"WER,":[82],"as":[83,94],"with":[84,95,149],"parallel":[85],"using":[87,103,115],"multiple":[88],"models,":[90],"fast":[92],"recognition,":[93],"single":[97],"model.":[99],"TS":[101,134,161],"word":[104,117],"concept":[105],"vectors":[106],"more":[108],"accurate":[109],"results":[110,129,152,156,162],"than":[111],"conventional":[113],"local":[116],"frequency":[118],"vectors.":[119],"information":[121],"integration":[122],"provides":[124],"functionality":[126],"SC":[137,155],"module.":[138],"story":[140],"boundary":[141],"accuracy":[143],"was":[144],"combining":[147],"it":[148],"compared":[157],"to":[158],"sole":[160]},"counts_by_year":[{"year":2016,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":4},{"year":2012,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
