{"id":"https://openalex.org/W1996262499","doi":"https://doi.org/10.1109/icassp.2002.5745047","title":"Identification of speakers in movie dialogs using audiovisual cues","display_name":"Identification of speakers in movie dialogs using audiovisual cues","publication_year":2002,"publication_date":"2002-05-01","ids":{"openalex":"https://openalex.org/W1996262499","doi":"https://doi.org/10.1109/icassp.2002.5745047","mag":"1996262499"},"language":"en","primary_location":{"id":"doi:10.1109/icassp.2002.5745047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2002.5745047","pdf_url":null,"source":{"id":"https://openalex.org/S4363607879","display_name":"IEEE International Conference on Acoustics Speech and Signal Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE International Conference on Acoustics Speech and Signal Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100414362","display_name":"Ying Li","orcid":"https://orcid.org/0000-0002-9604-2664"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Ying Li","raw_affiliation_strings":["Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, 90089-2564, USA"],"affiliations":[{"raw_affiliation_string":"Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, 90089-2564, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010028928","display_name":"Shrikanth Narayanan","orcid":"https://orcid.org/0000-0002-1052-6204"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shrikanth Narayanan","raw_affiliation_strings":["Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, 90089-2564, USA"],"affiliations":[{"raw_affiliation_string":"Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, 90089-2564, USA","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5001082656","display_name":"C.\u2010C. Jay Kuo","orcid":"https://orcid.org/0000-0001-9474-5035"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"C.-C. Jay Kuo","raw_affiliation_strings":["Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, 90089-2564, USA"],"affiliations":[{"raw_affiliation_string":"Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Integrated Media Systems Center and Department of Electrical Engineering, University of Southern California, Los Angeles, 90089-2564, USA","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5100414362"],"corresponding_institution_ids":["https://openalex.org/I1174212"],"apc_list":null,"apc_paid":null,"fwci":1.2966,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.74311927,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"4519","issue":null,"first_page":"II","last_page":"2093"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11439","display_name":"Video Analysis and Summarization","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8237122297286987},{"id":"https://openalex.org/keywords/silence","display_name":"Silence","score":0.6841673851013184},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.6643500328063965},{"id":"https://openalex.org/keywords/identification","display_name":"Identification (biology)","score":0.6172440648078918},{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.5991017818450928},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.510812520980835},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.45257940888404846},{"id":"https://openalex.org/keywords/detector","display_name":"Detector","score":0.4459885060787201},{"id":"https://openalex.org/keywords/shot","display_name":"Shot (pellet)","score":0.42601025104522705},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.30174529552459717}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8237122297286987},{"id":"https://openalex.org/C2781115785","wikidata":"https://www.wikidata.org/wiki/Q502261","display_name":"Silence","level":2,"score":0.6841673851013184},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6643500328063965},{"id":"https://openalex.org/C116834253","wikidata":"https://www.wikidata.org/wiki/Q2039217","display_name":"Identification (biology)","level":2,"score":0.6172440648078918},{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.5991017818450928},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.510812520980835},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.45257940888404846},{"id":"https://openalex.org/C94915269","wikidata":"https://www.wikidata.org/wiki/Q1834857","display_name":"Detector","level":2,"score":0.4459885060787201},{"id":"https://openalex.org/C2778344882","wikidata":"https://www.wikidata.org/wiki/Q278938","display_name":"Shot (pellet)","level":2,"score":0.42601025104522705},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.30174529552459717},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.0},{"id":"https://openalex.org/C178790620","wikidata":"https://www.wikidata.org/wiki/Q11351","display_name":"Organic chemistry","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C59822182","wikidata":"https://www.wikidata.org/wiki/Q441","display_name":"Botany","level":1,"score":0.0},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C107038049","wikidata":"https://www.wikidata.org/wiki/Q35986","display_name":"Aesthetics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp.2002.5745047","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp.2002.5745047","pdf_url":null,"source":{"id":"https://openalex.org/S4363607879","display_name":"IEEE International Conference on Acoustics Speech and Signal Processing","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE International Conference on Acoustics Speech and Signal Processing","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.75,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":7,"referenced_works":["https://openalex.org/W1983011477","https://openalex.org/W2069883713","https://openalex.org/W2098559774","https://openalex.org/W2120212892","https://openalex.org/W2156648741","https://openalex.org/W2169516439","https://openalex.org/W6674694071"],"related_works":["https://openalex.org/W642007152","https://openalex.org/W2401827384","https://openalex.org/W2355290951","https://openalex.org/W2052688117","https://openalex.org/W2552102772","https://openalex.org/W2103239478","https://openalex.org/W4294771049","https://openalex.org/W1523214805","https://openalex.org/W2168417340","https://openalex.org/W4229451372"],"abstract_inverted_index":{"The":[0],"problem":[1],"of":[2,102],"identifying":[3],"speakers":[4],"from":[5,40,64],"a":[6,79,112],"movie":[7],"dialog":[8],"scene":[9],"is":[10,81],"addressed":[11],"in":[12],"this":[13,55],"paper.":[14],"While":[15],"most":[16],"previous":[17],"work":[18],"on":[19,84],"speaker":[20],"identification":[21,128],"has":[22],"been":[23],"carried":[24],"out":[25],"using":[26],"pure":[27],"audio":[28,48,71],"data,":[29],"more":[30],"robust":[31],"results":[32,118],"could":[33],"be":[34],"obtained":[35],"by":[36,66,130],"integrating":[37,131],"the":[38,85,89,100,103,121],"knowledge":[39],"multiple":[41,132],"media":[42,133],"sources":[43],"such":[44],"as":[45],"visual":[46],"and":[47,60,73,93],"information":[49],"when":[50],"they":[51],"are":[52],"available.":[53],"In":[54],"work,":[56],"we":[57,107],"first":[58],"identify":[59],"isolate":[61],"speech":[62,91],"segments":[63],"background":[65],"applying":[67],"video":[68],"shot":[69],"detection,":[70],"classification":[72],"adaptive":[74,104],"silence":[75,105,115],"detection":[76],"techniques,":[77],"then":[78],"decision":[80],"made":[82],"based":[83],"calculated":[86],"likelihood":[87],"between":[88],"incoming":[90],"data":[92],"pre-trained":[94],"speaker/background":[95],"models.":[96],"Moreover,":[97],"to":[98],"verify":[99],"effectiveness":[101],"detector,":[106],"have":[108],"compared":[109],"it":[110],"with":[111],"statistically":[113],"trained":[114],"model.":[116],"Experimental":[117],"show":[119],"that":[120],"proposed":[122],"algorithm":[123],"can":[124],"achieve":[125],"approximately":[126],"84%":[127],"accuracy":[129],"cues.":[134]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
