{"id":"https://openalex.org/W2169237872","doi":"https://doi.org/10.1109/mmsp.2004.1436416","title":"A design of audio-visual talker tracking system based on csp analysis and frame difference in real noisy environments","display_name":"A design of audio-visual talker tracking system based on csp analysis and frame difference in real noisy environments","publication_year":2005,"publication_date":"2005-06-07","ids":{"openalex":"https://openalex.org/W2169237872","doi":"https://doi.org/10.1109/mmsp.2004.1436416","mag":"2169237872"},"language":"en","primary_location":{"id":"doi:10.1109/mmsp.2004.1436416","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp.2004.1436416","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE 6th Workshop on Multimedia Signal Processing, 2004.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5068006232","display_name":"Yuki Denda","orcid":null},"institutions":[{"id":"https://openalex.org/I75198481","display_name":"Wakayama University","ror":"https://ror.org/05wr49d48","country_code":"JP","type":"education","lineage":["https://openalex.org/I75198481"]}],"countries":["JP"],"is_corresponding":true,"raw_author_name":"Y. Denda","raw_affiliation_strings":["Faculty of Systems Engineering, Wakayama University, Wakayama, Japan"],"affiliations":[{"raw_affiliation_string":"Faculty of Systems Engineering, Wakayama University, Wakayama, Japan","institution_ids":["https://openalex.org/I75198481"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009257831","display_name":"Takanobu Nishiura","orcid":"https://orcid.org/0000-0002-1706-4743"},"institutions":[{"id":"https://openalex.org/I135768898","display_name":"Ritsumeikan University","ror":"https://ror.org/0197nmd03","country_code":"JP","type":"education","lineage":["https://openalex.org/I135768898","https://openalex.org/I4390039241"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"T. Nishiura","raw_affiliation_strings":["College of Information Science and Engineering, Ritsumeikan University, Kusatsu, Japan"],"affiliations":[{"raw_affiliation_string":"College of Information Science and Engineering, Ritsumeikan University, Kusatsu, Japan","institution_ids":["https://openalex.org/I135768898"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5072749858","display_name":"Hideki Kawahara","orcid":"https://orcid.org/0000-0001-9360-5700"},"institutions":[{"id":"https://openalex.org/I75198481","display_name":"Wakayama University","ror":"https://ror.org/05wr49d48","country_code":"JP","type":"education","lineage":["https://openalex.org/I75198481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"H. Kawahara","raw_affiliation_strings":["Faculty of Systems Engineering, Wakayama University, Wakayama, Japan"],"affiliations":[{"raw_affiliation_string":"Faculty of Systems Engineering, Wakayama University, Wakayama, Japan","institution_ids":["https://openalex.org/I75198481"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5056119492","display_name":"Timoharu IRINO","orcid":null},"institutions":[{"id":"https://openalex.org/I75198481","display_name":"Wakayama University","ror":"https://ror.org/05wr49d48","country_code":"JP","type":"education","lineage":["https://openalex.org/I75198481"]}],"countries":["JP"],"is_corresponding":false,"raw_author_name":"T. Irino","raw_affiliation_strings":["Faculty of Systems Engineering, Wakayama University, Wakayama, Japan"],"affiliations":[{"raw_affiliation_string":"Faculty of Systems Engineering, Wakayama University, Wakayama, Japan","institution_ids":["https://openalex.org/I75198481"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5068006232"],"corresponding_institution_ids":["https://openalex.org/I75198481"],"apc_list":null,"apc_paid":null,"fwci":0.6359,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.71740139,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"63","last_page":"66"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11233","display_name":"Advanced Adaptive Filtering Techniques","score":0.9975000023841858,"subfield":{"id":"https://openalex.org/subfields/2206","display_name":"Computational Mechanics"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7618473768234253},{"id":"https://openalex.org/keywords/microphone","display_name":"Microphone","score":0.6799018383026123},{"id":"https://openalex.org/keywords/signal","display_name":"SIGNAL (programming language)","score":0.623600959777832},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.6056740283966064},{"id":"https://openalex.org/keywords/microphone-array","display_name":"Microphone array","score":0.5689091682434082},{"id":"https://openalex.org/keywords/teleconference","display_name":"Teleconference","score":0.5329034924507141},{"id":"https://openalex.org/keywords/tracking","display_name":"Tracking (education)","score":0.5210677981376648},{"id":"https://openalex.org/keywords/audio-signal","display_name":"Audio signal","score":0.5205774903297424},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.511905312538147},{"id":"https://openalex.org/keywords/computer-vision","display_name":"Computer vision","score":0.5066021680831909},{"id":"https://openalex.org/keywords/speech-enhancement","display_name":"Speech enhancement","score":0.49486321210861206},{"id":"https://openalex.org/keywords/audio-signal-processing","display_name":"Audio signal processing","score":0.41053885221481323},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4090903401374817},{"id":"https://openalex.org/keywords/speech-coding","display_name":"Speech coding","score":0.20961028337478638},{"id":"https://openalex.org/keywords/noise-reduction","display_name":"Noise reduction","score":0.1806771457195282}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7618473768234253},{"id":"https://openalex.org/C2778263558","wikidata":"https://www.wikidata.org/wiki/Q46384","display_name":"Microphone","level":3,"score":0.6799018383026123},{"id":"https://openalex.org/C2779843651","wikidata":"https://www.wikidata.org/wiki/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.623600959777832},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.6056740283966064},{"id":"https://openalex.org/C2778806681","wikidata":"https://www.wikidata.org/wiki/Q907293","display_name":"Microphone array","level":4,"score":0.5689091682434082},{"id":"https://openalex.org/C126669455","wikidata":"https://www.wikidata.org/wiki/Q1630941","display_name":"Teleconference","level":2,"score":0.5329034924507141},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.5210677981376648},{"id":"https://openalex.org/C64922751","wikidata":"https://www.wikidata.org/wiki/Q4650799","display_name":"Audio signal","level":3,"score":0.5205774903297424},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.511905312538147},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.5066021680831909},{"id":"https://openalex.org/C2776182073","wikidata":"https://www.wikidata.org/wiki/Q7575395","display_name":"Speech enhancement","level":3,"score":0.49486321210861206},{"id":"https://openalex.org/C127220857","wikidata":"https://www.wikidata.org/wiki/Q2719318","display_name":"Audio signal processing","level":4,"score":0.41053885221481323},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4090903401374817},{"id":"https://openalex.org/C13895895","wikidata":"https://www.wikidata.org/wiki/Q3270773","display_name":"Speech coding","level":2,"score":0.20961028337478638},{"id":"https://openalex.org/C163294075","wikidata":"https://www.wikidata.org/wiki/Q581861","display_name":"Noise reduction","level":2,"score":0.1806771457195282},{"id":"https://openalex.org/C68115822","wikidata":"https://www.wikidata.org/wiki/Q1068172","display_name":"Sound pressure","level":2,"score":0.0},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0},{"id":"https://openalex.org/C19417346","wikidata":"https://www.wikidata.org/wiki/Q7922","display_name":"Pedagogy","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/mmsp.2004.1436416","is_oa":false,"landing_page_url":"https://doi.org/10.1109/mmsp.2004.1436416","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE 6th Workshop on Multimedia Signal Processing, 2004.","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5099999904632568,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2020933619","https://openalex.org/W2046317813","https://openalex.org/W2081415402","https://openalex.org/W2101609516","https://openalex.org/W2113416157","https://openalex.org/W2130432819","https://openalex.org/W2319794630","https://openalex.org/W4213181783","https://openalex.org/W4247773827"],"related_works":["https://openalex.org/W2380771148","https://openalex.org/W2348931051","https://openalex.org/W2132495960","https://openalex.org/W2538903081","https://openalex.org/W2389562147","https://openalex.org/W3008816511","https://openalex.org/W4386746628","https://openalex.org/W2109356272","https://openalex.org/W1525049495","https://openalex.org/W4286233499"],"abstract_inverted_index":{"It":[0],"is":[1,21,35,97],"very":[2],"important":[3],"to":[4,37],"capture":[5],"the":[6,30,39,60,85,91,126,131,136],"distant-talking":[7],"speech":[8],"with":[9,46,104,110],"high-quality":[10],"for":[11,25,29],"voice-controlled":[12],"systems":[13],"or":[14],"teleconferencing":[15],"systems.":[16],"A":[17],"microphone":[18,31],"array":[19,32],"steering":[20],"an":[22],"ideal":[23],"candidate":[24],"this":[26,71],"purpose.":[27],"However,":[28],"steering,":[33],"it":[34],"necessary":[36],"track":[38,130],"target":[40,61,77,132],"talker.":[41],"Conventional":[42],"talker":[43,62,78,133],"tracking":[44,79],"algorithms":[45],"audio":[47,86,105],"signal":[48,106],"only":[49,83],"(ex.":[50],"CSP":[51,102],"(cross-power":[52],"spectrum":[53],"phase)":[54],"analysis)":[55],"have":[56],"a":[57,75,114,120],"difficulty":[58],"estimating":[59],"direction":[63],"accurately":[64,134],"in":[65,119],"higher":[66],"noisy":[67],"environments.":[68],"To":[69],"overcome":[70],"problem,":[72],"we":[73,123],"propose":[74],"new":[76],"algorithm":[80,96,128],"that":[81,125],"not":[82],"utilize":[84,90],"signal,":[87],"but":[88],"also":[89],"visual":[92,111],"signal.":[93,112],"The":[94],"proposed":[95,127],"based":[98],"on":[99],"integration":[100],"of":[101,116],"analysis":[103],"and":[107],"frame":[108],"difference":[109],"As":[113],"result":[115],"evaluation":[117],"experiments":[118],"real":[121],"room,":[122],"confirmed":[124],"could":[129],"than":[135],"conventional":[137],"algorithm.":[138]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
