{"id":"https://openalex.org/W2136813782","doi":"https://doi.org/10.1109/tasl.2008.2006728","title":"Automatic Detection of Disfluency Boundaries in Spontaneous Speech of Children Using Audio&amp;#x2013;Visual Information","display_name":"Automatic Detection of Disfluency Boundaries in Spontaneous Speech of Children Using Audio&amp;#x2013;Visual Information","publication_year":2009,"publication_date":"2009-01-01","ids":{"openalex":"https://openalex.org/W2136813782","doi":"https://doi.org/10.1109/tasl.2008.2006728","mag":"2136813782"},"language":"en","primary_location":{"id":"doi:10.1109/tasl.2008.2006728","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2008.2006728","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5084041766","display_name":"Serdar Y\u0131ld\u0131r\u0131m","orcid":"https://orcid.org/0000-0003-3151-9916"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]},{"id":"https://openalex.org/I46000314","display_name":"Mustafa Kemal University","ror":"https://ror.org/056hcgc41","country_code":"TR","type":"education","lineage":["https://openalex.org/I46000314"]}],"countries":["TR","US"],"is_corresponding":true,"raw_author_name":"Serdar Yildirim","raw_affiliation_strings":["Department of Computer Engineering, Mustafa Kemal University, Antakya, Hatay, Turkey","Department of Electrical Engineering and IMSC, University of Southern California, Los Angeles, CA, USA","Dept. of Electr. Eng. & IMSC, Univ. of Southern California, Los Angeles, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Computer Engineering, Mustafa Kemal University, Antakya, Hatay, Turkey","institution_ids":["https://openalex.org/I46000314"]},{"raw_affiliation_string":"Department of Electrical Engineering and IMSC, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Dept. of Electr. Eng. & IMSC, Univ. of Southern California, Los Angeles, CA#TAB#","institution_ids":["https://openalex.org/I1174212"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5010028928","display_name":"Shrikanth Narayanan","orcid":"https://orcid.org/0000-0002-1052-6204"},"institutions":[{"id":"https://openalex.org/I1174212","display_name":"University of Southern California","ror":"https://ror.org/03taz7m60","country_code":"US","type":"education","lineage":["https://openalex.org/I1174212"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shrikanth Narayanan","raw_affiliation_strings":["Department of Electrical Engineering and IMSC, University of Southern California, Los Angeles, CA, USA","Dept. of Electr. Eng. & IMSC, Univ. of Southern California, Los Angeles, CA#TAB#"],"affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering and IMSC, University of Southern California, Los Angeles, CA, USA","institution_ids":["https://openalex.org/I1174212"]},{"raw_affiliation_string":"Dept. of Electr. Eng. & IMSC, Univ. of Southern California, Los Angeles, CA#TAB#","institution_ids":["https://openalex.org/I1174212"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5084041766"],"corresponding_institution_ids":["https://openalex.org/I1174212","https://openalex.org/I46000314"],"apc_list":null,"apc_paid":null,"fwci":2.1148,"has_fulltext":false,"cited_by_count":23,"citation_normalized_percentile":{"value":0.87410364,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"17","issue":"1","first_page":"2","last_page":"12"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.998199999332428,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9962999820709229,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9937999844551086,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7616664171218872},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5565568208694458},{"id":"https://openalex.org/keywords/dialog-box","display_name":"Dialog box","score":0.5199726819992065},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.5119467973709106},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.4855072498321533},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.4397052824497223},{"id":"https://openalex.org/keywords/cued-speech","display_name":"Cued speech","score":0.43703919649124146},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4285229742527008},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4017714858055115},{"id":"https://openalex.org/keywords/speech-synthesis","display_name":"Speech synthesis","score":0.2974829077720642},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.16720935702323914},{"id":"https://openalex.org/keywords/cognitive-psychology","display_name":"Cognitive psychology","score":0.09043243527412415}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7616664171218872},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5565568208694458},{"id":"https://openalex.org/C173853756","wikidata":"https://www.wikidata.org/wiki/Q86915","display_name":"Dialog box","level":2,"score":0.5199726819992065},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.5119467973709106},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.4855072498321533},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.4397052824497223},{"id":"https://openalex.org/C83195618","wikidata":"https://www.wikidata.org/wiki/Q590951","display_name":"Cued speech","level":2,"score":0.43703919649124146},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4285229742527008},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4017714858055115},{"id":"https://openalex.org/C14999030","wikidata":"https://www.wikidata.org/wiki/Q16346","display_name":"Speech synthesis","level":2,"score":0.2974829077720642},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.16720935702323914},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.09043243527412415},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/tasl.2008.2006728","is_oa":false,"landing_page_url":"https://doi.org/10.1109/tasl.2008.2006728","pdf_url":null,"source":{"id":"https://openalex.org/S199497470","display_name":"IEEE Transactions on Audio Speech and Language Processing","issn_l":"1558-7916","issn":["1558-7916","1558-7924"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.550000011920929,"display_name":"Peace, Justice and strong institutions","id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":43,"referenced_works":["https://openalex.org/W61566120","https://openalex.org/W104463061","https://openalex.org/W196503049","https://openalex.org/W340894044","https://openalex.org/W343483379","https://openalex.org/W1506281249","https://openalex.org/W1631260214","https://openalex.org/W1776994947","https://openalex.org/W1846608861","https://openalex.org/W1981039744","https://openalex.org/W1984272147","https://openalex.org/W1989386832","https://openalex.org/W1994888226","https://openalex.org/W2068700163","https://openalex.org/W2077694950","https://openalex.org/W2088676098","https://openalex.org/W2115002762","https://openalex.org/W2115629999","https://openalex.org/W2117752179","https://openalex.org/W2124246394","https://openalex.org/W2124399597","https://openalex.org/W2124812654","https://openalex.org/W2127305722","https://openalex.org/W2139143116","https://openalex.org/W2140243734","https://openalex.org/W2145510725","https://openalex.org/W2150919661","https://openalex.org/W2151093612","https://openalex.org/W2155941368","https://openalex.org/W2157236054","https://openalex.org/W2161299247","https://openalex.org/W2163172327","https://openalex.org/W2166128510","https://openalex.org/W2799061466","https://openalex.org/W3003662786","https://openalex.org/W3216401400","https://openalex.org/W4205687621","https://openalex.org/W4285719527","https://openalex.org/W6602520203","https://openalex.org/W6608046865","https://openalex.org/W6611620177","https://openalex.org/W6678727552","https://openalex.org/W6684467545"],"related_works":["https://openalex.org/W1009706782","https://openalex.org/W2060642356","https://openalex.org/W2903955902","https://openalex.org/W1661933970","https://openalex.org/W2161314515","https://openalex.org/W2403645569","https://openalex.org/W4366957107","https://openalex.org/W2250825451","https://openalex.org/W2132474591","https://openalex.org/W2059064971"],"abstract_inverted_index":{"The":[0,131],"presence":[1,105],"of":[2,41,53,83,93,106,145,161,177],"disfluencies":[3,33,64,107],"in":[4,34,38,65,108,172],"spontaneous":[5,36],"speech,":[6,37,66],"while":[7],"poses":[8],"a":[9,24,109],"challenge":[10],"for":[11,18,128,182],"robust":[12],"automatic":[13,51],"recognition,":[14],"also":[15],"offers":[16],"means":[17],"gaining":[19],"additional":[20],"insights":[21],"into":[22],"understanding":[23],"speaker's":[25],"communicative":[26],"and":[27,48,81,99,113,166,179,189],"cognitive":[28],"state.":[29],"This":[30,88],"paper":[31,89],"analyzes":[32],"children's":[35,136],"the":[39,50,79,84,91,104,124,159,186],"context":[40],"spoken":[42],"dialog":[43,138],"based":[44],"computer":[45],"game":[46],"play,":[47],"addresses":[49],"detection":[52,86,143,174],"disfluency":[54,85,129,142,173],"boundaries.":[55],"Although":[56],"several":[57],"approaches":[58],"have":[59],"been":[60,71],"proposed":[61],"to":[62,73,77,102,122,164],"detect":[63,103],"relatively":[67],"little":[68],"work":[69],"has":[70],"done":[72],"utilize":[74],"visual":[75,94,162],"information":[76,95,101,117,126,163,183],"improve":[78],"performance":[80],"robustness":[82],"system.":[87],"describes":[90],"use":[92],"along":[96],"with":[97],"prosodic":[98],"language":[100,167],"child's":[110],"computer-directed":[111],"speech":[112],"shows":[114],"how":[115],"these":[116],"sources":[118],"can":[119,148],"be":[120,149],"integrated":[121],"increase":[123],"overall":[125],"available":[127],"detection.":[130],"experimental":[132],"results":[133,156],"on":[134],"our":[135],"multimodal":[137],"corpus":[139],"indicate":[140],"that":[141,158],"accuracy":[144],"over":[146],"80%":[147],"obtained":[150],"by":[151],"utilizing":[152],"audio-visual":[153],"information.":[154],"Specifically,":[155],"showed":[157],"addition":[160],"prosody":[165],"features":[168],"yield":[169],"relative":[170],"improvements":[171],"error":[175],"rates":[176],"3.6%":[178],"6.3%,":[180],"respectively,":[181],"fusion":[184],"at":[185],"feature":[187],"level":[188],"decision":[190],"level.":[191]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":1},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":4},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2014,"cited_by_count":1},{"year":2013,"cited_by_count":3}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
