{"id":"https://openalex.org/W2150387026","doi":"https://doi.org/10.1109/ism.2007.4412354","title":"Multi-stream Asynchrony Modeling for Audio-Visual Speech Recognition","display_name":"Multi-stream Asynchrony Modeling for Audio-Visual Speech Recognition","publication_year":2007,"publication_date":"2007-12-01","ids":{"openalex":"https://openalex.org/W2150387026","doi":"https://doi.org/10.1109/ism.2007.4412354","mag":"2150387026"},"language":"en","primary_location":{"id":"doi:10.1109/ism.2007.4412354","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ism.2007.4412354","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ninth IEEE International Symposium on Multimedia (ISM 2007)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5072820861","display_name":"Guoyun Lv","orcid":"https://orcid.org/0000-0003-2262-3236"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoyun Lv","raw_affiliation_strings":["School of Computer Science, Northwestern Polytechnical University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Northwestern Polytechnical University, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102021514","display_name":"Dongmei Jiang","orcid":"https://orcid.org/0000-0002-6238-8499"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dongmei Jiang","raw_affiliation_strings":["School of Computer Science, Northwestern Polytechnical University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Northwestern Polytechnical University, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114015531","display_name":"Rongchun Zhao","orcid":"https://orcid.org/0009-0009-8961-1839"},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Rongchun Zhao","raw_affiliation_strings":["School of Computer Science, Northwestern Polytechnical University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Northwestern Polytechnical University, China","institution_ids":["https://openalex.org/I17145004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100518779","display_name":"Yunshu Hou","orcid":null},"institutions":[{"id":"https://openalex.org/I17145004","display_name":"Northwestern Polytechnical University","ror":"https://ror.org/01y0j0j86","country_code":"CN","type":"education","lineage":["https://openalex.org/I17145004"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yunshu Hou","raw_affiliation_strings":["School of Computer Science, Northwestern Polytechnical University, China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"School of Computer Science, Northwestern Polytechnical University, China","institution_ids":["https://openalex.org/I17145004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.9765,"has_fulltext":false,"cited_by_count":11,"citation_normalized_percentile":{"value":0.77769572,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"37","last_page":"44"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9994000196456909,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9979000091552734,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7853979468345642},{"id":"https://openalex.org/keywords/asynchrony","display_name":"Asynchrony (computer programming)","score":0.7700749635696411},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.7362976670265198},{"id":"https://openalex.org/keywords/phone","display_name":"Phone","score":0.5993172526359558},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.5058786273002625},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.45144104957580566},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4466620683670044},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.41357314586639404},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.4121952950954437},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37777647376060486},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.323428750038147},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.30190426111221313},{"id":"https://openalex.org/keywords/asynchronous-communication","display_name":"Asynchronous communication","score":0.20262446999549866},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0666218101978302}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7853979468345642},{"id":"https://openalex.org/C2779019669","wikidata":"https://www.wikidata.org/wiki/Q25203946","display_name":"Asynchrony (computer programming)","level":3,"score":0.7700749635696411},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.7362976670265198},{"id":"https://openalex.org/C2778707766","wikidata":"https://www.wikidata.org/wiki/Q202064","display_name":"Phone","level":2,"score":0.5993172526359558},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.5058786273002625},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.45144104957580566},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4466620683670044},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.41357314586639404},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.4121952950954437},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37777647376060486},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.323428750038147},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.30190426111221313},{"id":"https://openalex.org/C151319957","wikidata":"https://www.wikidata.org/wiki/Q752739","display_name":"Asynchronous communication","level":2,"score":0.20262446999549866},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0666218101978302},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/ism.2007.4412354","is_oa":false,"landing_page_url":"https://doi.org/10.1109/ism.2007.4412354","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Ninth IEEE International Symposium on Multimedia (ISM 2007)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.44999998807907104,"id":"https://metadata.un.org/sdg/16","display_name":"Peace, Justice and strong institutions"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":25,"referenced_works":["https://openalex.org/W88081813","https://openalex.org/W135040384","https://openalex.org/W175821084","https://openalex.org/W199836251","https://openalex.org/W1960653181","https://openalex.org/W1978380426","https://openalex.org/W2090861223","https://openalex.org/W2096391593","https://openalex.org/W2106904816","https://openalex.org/W2110575115","https://openalex.org/W2117172387","https://openalex.org/W2121486117","https://openalex.org/W2122678358","https://openalex.org/W2137075158","https://openalex.org/W2148491550","https://openalex.org/W2151484683","https://openalex.org/W2157827878","https://openalex.org/W2163680580","https://openalex.org/W2384542382","https://openalex.org/W2793908218","https://openalex.org/W6607219637","https://openalex.org/W6641135991","https://openalex.org/W6644697353","https://openalex.org/W6676404962","https://openalex.org/W6682011138"],"related_works":["https://openalex.org/W2157598242","https://openalex.org/W2121652828","https://openalex.org/W2092572762","https://openalex.org/W3159882232","https://openalex.org/W4241650944","https://openalex.org/W3033124456","https://openalex.org/W2964829415","https://openalex.org/W2131711534","https://openalex.org/W642007152","https://openalex.org/W2341426843"],"abstract_inverted_index":{"In":[0],"this":[1],"paper,":[2],"two":[3],"multi-stream":[4],"asynchrony":[5,31,138],"Dynamic":[6],"Bayesian":[7],"Network":[8],"models":[9],"(MS-ADBN":[10],"model":[11,69,102,109,152],"and":[12,34,47,59,88,142,150],"MM-ADBN":[13,108,151],"model)":[14],"are":[15,117],"proposed":[16,23],"for":[17,147,157],"audio-visual":[18,122],"speech":[19,163],"recognition":[20,115],"(AVSR).":[21],"The":[22,133],"models,":[24],"with":[25,64],"different":[26],"topology":[27],"structures,":[28],"loose":[29],"the":[30,85,89,95,137,154,158],"of":[32,55,73,78,98,160],"audio":[33,45,141],"visual":[35,49,143],"streams":[36],"to":[37,93],"word":[38,52,105],"level.":[39],"For":[40],"MS-ADBN":[41,74,101],"model,":[42,75,106],"both":[43],"in":[44,48],"stream":[46,144],"stream,":[50],"each":[51,60],"is":[53,62,70,82,103,110,145],"composed":[54],"its":[56],"corresponding":[57],"phones,":[58],"phone":[61,86,112],"associated":[63],"observation":[65,90],"vector.":[66],"MM-":[67],"ADBN":[68],"an":[71],"augmentation":[72],"a":[76,104,111,120,129],"level":[77,87],"hidden":[79],"nodes--state":[80],"level,":[81,92],"added":[83],"between":[84,140],"node":[91],"describe":[94],"dynamic":[96],"process":[97],"phones.":[99],"Essentially,":[100],"while":[107],"model.":[113],"Speech":[114],"experiments":[116],"done":[118],"on":[119,128],"digit":[121],"(A-V)":[123],"database,":[124],"as":[125,127],"well":[126],"continuous":[130,161],"A-V":[131,162],"database.":[132],"results":[134],"demonstrate":[135],"that":[136],"description":[139],"important":[146],"AVSR":[148],"system,":[149],"has":[153],"best":[155],"performance":[156],"task":[159],"recognition.":[164]},"counts_by_year":[{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2015,"cited_by_count":1},{"year":2013,"cited_by_count":1},{"year":2012,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
