{"id":"https://openalex.org/W1501076695","doi":"https://doi.org/10.1109/isspa.2005.1580195","title":"Comparing audio and visual information for speech processing","display_name":"Comparing audio and visual information for speech processing","publication_year":2006,"publication_date":"2006-10-04","ids":{"openalex":"https://openalex.org/W1501076695","doi":"https://doi.org/10.1109/isspa.2005.1580195","mag":"1501076695"},"language":"en","primary_location":{"id":"doi:10.1109/isspa.2005.1580195","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isspa.2005.1580195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Eighth International Symposium on Signal Processing and Its Applications, 2005.","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://eprints.qut.edu.au/5342/1/4693.pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5000014182","display_name":"David Dean","orcid":"https://orcid.org/0000-0003-1274-9775"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":true,"raw_author_name":"D. Dean","raw_affiliation_strings":["Speech, Audio, Image and Video Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"affiliations":[{"raw_affiliation_string":"Speech, Audio, Image and Video Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5032604032","display_name":"Patrick Lucey","orcid":null},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"P. Lucey","raw_affiliation_strings":["Speech, Audio, Image and Video Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"affiliations":[{"raw_affiliation_string":"Speech, Audio, Image and Video Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5055128383","display_name":"Sridha Sridharan","orcid":"https://orcid.org/0000-0003-4316-9001"},"institutions":[{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"S. Sridharan","raw_affiliation_strings":["Speech, Audio, Image and Video Research Laboratory, Queensland University of Technology, Brisbane, Australia"],"affiliations":[{"raw_affiliation_string":"Speech, Audio, Image and Video Research Laboratory, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I160993911"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5111428430","display_name":"Timothy J. Wark","orcid":null},"institutions":[{"id":"https://openalex.org/I4210141844","display_name":"Australian e-Health Research Centre","ror":"https://ror.org/04ywhbc61","country_code":"AU","type":"facility","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801244131","https://openalex.org/I2801453606","https://openalex.org/I4210141844","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I1292875679","display_name":"Commonwealth Scientific and Industrial Research Organisation","ror":"https://ror.org/03qn8fb07","country_code":"AU","type":"funder","lineage":["https://openalex.org/I1292875679","https://openalex.org/I2801453606","https://openalex.org/I4387156119"]},{"id":"https://openalex.org/I160993911","display_name":"Queensland University of Technology","ror":"https://ror.org/03pnv4752","country_code":"AU","type":"education","lineage":["https://openalex.org/I160993911"]}],"countries":["AU"],"is_corresponding":false,"raw_author_name":"T. Wark","raw_affiliation_strings":["E-Health Research Centre/CSIRO ICT Centre, Queensland University of Technology, Brisbane, Australia"],"affiliations":[{"raw_affiliation_string":"E-Health Research Centre/CSIRO ICT Centre, Queensland University of Technology, Brisbane, Australia","institution_ids":["https://openalex.org/I4210141844","https://openalex.org/I1292875679","https://openalex.org/I160993911"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5000014182"],"corresponding_institution_ids":["https://openalex.org/I160993911"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.04619446,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"1","issue":null,"first_page":"58","last_page":"61"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.825742244720459},{"id":"https://openalex.org/keywords/speaker-recognition","display_name":"Speaker recognition","score":0.7151975035667419},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7079318761825562},{"id":"https://openalex.org/keywords/speech-analytics","display_name":"Speech analytics","score":0.6683361530303955},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.6495653390884399},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.5317441821098328},{"id":"https://openalex.org/keywords/voice-activity-detection","display_name":"Voice activity detection","score":0.45987433195114136},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4515761435031891},{"id":"https://openalex.org/keywords/speaker-diarisation","display_name":"Speaker diarisation","score":0.4496590793132782},{"id":"https://openalex.org/keywords/confusion","display_name":"Confusion","score":0.42979931831359863},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.4137098491191864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3544765114784241},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.135443776845932}],"concepts":[{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.825742244720459},{"id":"https://openalex.org/C133892786","wikidata":"https://www.wikidata.org/wiki/Q1145189","display_name":"Speaker recognition","level":2,"score":0.7151975035667419},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7079318761825562},{"id":"https://openalex.org/C54953205","wikidata":"https://www.wikidata.org/wiki/Q4142201","display_name":"Speech analytics","level":4,"score":0.6683361530303955},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.6495653390884399},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.5317441821098328},{"id":"https://openalex.org/C204201278","wikidata":"https://www.wikidata.org/wiki/Q1332614","display_name":"Voice activity detection","level":3,"score":0.45987433195114136},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4515761435031891},{"id":"https://openalex.org/C149838564","wikidata":"https://www.wikidata.org/wiki/Q7574248","display_name":"Speaker diarisation","level":3,"score":0.4496590793132782},{"id":"https://openalex.org/C2781140086","wikidata":"https://www.wikidata.org/wiki/Q557945","display_name":"Confusion","level":2,"score":0.42979931831359863},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.4137098491191864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3544765114784241},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.135443776845932},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C11171543","wikidata":"https://www.wikidata.org/wiki/Q41630","display_name":"Psychoanalysis","level":1,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1109/isspa.2005.1580195","is_oa":false,"landing_page_url":"https://doi.org/10.1109/isspa.2005.1580195","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the Eighth International Symposium on Signal Processing and Its Applications, 2005.","raw_type":"proceedings-article"},{"id":"pmh:oai:eprints.qut.edu.au:5342","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.qut.edu.au/5342/1/4693.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the Eighth International Symposium on Signal Processing and Its Applications","raw_type":"Chapter in Book, Report or Conference volume"},{"id":"mag:1501076695","is_oa":false,"landing_page_url":"http://eprints.qut.edu.au/archive/00005342/","pdf_url":null,"source":{"id":"https://openalex.org/S4306510302","display_name":"Faculty of Built Environment and Engineering","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":"Faculty of Built Environment and Engineering","raw_type":null}],"best_oa_location":{"id":"pmh:oai:eprints.qut.edu.au:5342","is_oa":true,"landing_page_url":null,"pdf_url":"https://eprints.qut.edu.au/5342/1/4693.pdf","source":{"id":"https://openalex.org/S4306402607","display_name":"QUT ePrints (Queensland University of Technology)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I160993911","host_organization_name":"Queensland University of Technology","host_organization_lineage":["https://openalex.org/I160993911"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Proceedings of the Eighth International Symposium on Signal Processing and Its Applications","raw_type":"Chapter in Book, Report or Conference volume"},"sustainable_development_goals":[{"score":0.7699999809265137,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320309106","display_name":"Clemson University","ror":"https://ror.org/037s24f05"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https://content.openalex.org/works/W1501076695.pdf"},"referenced_works_count":4,"referenced_works":["https://openalex.org/W2075306129","https://openalex.org/W2132999255","https://openalex.org/W3143803406","https://openalex.org/W6678156414"],"related_works":["https://openalex.org/W2328213259","https://openalex.org/W2545162966","https://openalex.org/W170174517","https://openalex.org/W1669314606","https://openalex.org/W2061992966","https://openalex.org/W2129996067","https://openalex.org/W2014408663","https://openalex.org/W146217425","https://openalex.org/W2100586179","https://openalex.org/W1940231550","https://openalex.org/W2132877153","https://openalex.org/W2035938076","https://openalex.org/W3145350685","https://openalex.org/W1901264808","https://openalex.org/W1982122133","https://openalex.org/W1652729136","https://openalex.org/W1968980933","https://openalex.org/W1967572563","https://openalex.org/W109664220","https://openalex.org/W1940002922"],"abstract_inverted_index":{"This":[0],"paper":[1],"examines":[2],"the":[3,9,21,46,73,90],"utility":[4],"of":[5,13,20,48,67,92],"audio-visual":[6],"speech":[7,14,28,41,56,99],"for":[8,45,52,77,85,97],"two":[10],"related":[11],"tasks":[12],"and":[15,27,57,100],"speaker":[16,26,49,58,78,101],"recognition.":[17,102],"A":[18],"study":[19],"confusion":[22],"that":[23,34,72,84],"exists":[24],"between":[25],"elements":[29],"was":[30,80],"performed":[31],"to":[32,70],"show":[33],"principal":[35],"component":[36],"analysis":[37],"(PCA)":[38],"based":[39],"visual":[40,95],"is":[42],"considerably":[43],"better":[44],"task":[47],"recognition":[50,59,79],"than":[51,83],"speech.":[53,86],"Decision":[54],"fusion":[55,75],"engines":[60],"were":[61],"also":[62],"tested":[63],"under":[64],"various":[65],"levels":[66],"acoustic":[68],"degradation":[69],"find":[71],"optimal":[74],"configuration":[76],"substantially":[81],"different":[82],"These":[87],"results":[88],"highlight":[89],"problem":[91],"employing":[93],"similar":[94],"features":[96],"both":[98]},"counts_by_year":[],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}
