{"id":"https://openalex.org/W260797086","doi":"https://doi.org/10.21437/interspeech.2004-573","title":"Using machine learning to cope with imbalanced classes in natural speech: evidence from sentence boundary and disfluency detection","display_name":"Using machine learning to cope with imbalanced classes in natural speech: evidence from sentence boundary and disfluency detection","publication_year":2004,"publication_date":"2004-10-04","ids":{"openalex":"https://openalex.org/W260797086","doi":"https://doi.org/10.21437/interspeech.2004-573","mag":"260797086"},"language":"en","primary_location":{"id":"doi:10.21437/interspeech.2004-573","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2004-573","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2004","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062666378","display_name":"Yang Liu","orcid":"https://orcid.org/0000-0002-2814-8969"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Yang Liu","raw_affiliation_strings":["Purdue University","University of California at Berkeley"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]},{"raw_affiliation_string":"University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5107174154","display_name":"Elizabeth Shriberg","orcid":"https://orcid.org/0009-0004-3779-4956"},"institutions":[{"id":"https://openalex.org/I1298353152","display_name":"SRI International","ror":"https://ror.org/05s570m15","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1298353152"]},{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Elizabeth Shriberg","raw_affiliation_strings":["SRI International","University of California at Berkeley"],"affiliations":[{"raw_affiliation_string":"SRI International","institution_ids":["https://openalex.org/I1298353152"]},{"raw_affiliation_string":"University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5060979948","display_name":"Andreas Stolcke","orcid":"https://orcid.org/0000-0002-9925-905X"},"institutions":[{"id":"https://openalex.org/I95457486","display_name":"University of California, Berkeley","ror":"https://ror.org/01an7q238","country_code":"US","type":"education","lineage":["https://openalex.org/I95457486"]},{"id":"https://openalex.org/I1298353152","display_name":"SRI International","ror":"https://ror.org/05s570m15","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I1298353152"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andreas Stolcke","raw_affiliation_strings":["University of California at Berkeley","SRI International"],"affiliations":[{"raw_affiliation_string":"University of California at Berkeley","institution_ids":["https://openalex.org/I95457486"]},{"raw_affiliation_string":"SRI International","institution_ids":["https://openalex.org/I1298353152"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5113657967","display_name":"Mary P. Harper","orcid":null},"institutions":[{"id":"https://openalex.org/I219193219","display_name":"Purdue University West Lafayette","ror":"https://ror.org/02dqehb95","country_code":"US","type":"education","lineage":["https://openalex.org/I219193219"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mary Harper","raw_affiliation_strings":["Purdue University"],"affiliations":[{"raw_affiliation_string":"Purdue University","institution_ids":["https://openalex.org/I219193219"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5062666378"],"corresponding_institution_ids":["https://openalex.org/I219193219","https://openalex.org/I95457486"],"apc_list":null,"apc_paid":null,"fwci":4.0473,"has_fulltext":false,"cited_by_count":20,"citation_normalized_percentile":{"value":0.93474861,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"1525","last_page":"1528"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9977999925613403,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9965000152587891,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/boosting","display_name":"Boosting (machine learning)","score":0.7362356185913086},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7305577993392944},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.676609456539154},{"id":"https://openalex.org/keywords/upsampling","display_name":"Upsampling","score":0.6299682855606079},{"id":"https://openalex.org/keywords/oversampling","display_name":"Oversampling","score":0.594347357749939},{"id":"https://openalex.org/keywords/ensemble-learning","display_name":"Ensemble learning","score":0.5652422904968262},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5492269992828369},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5388215780258179},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49648934602737427},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.47458788752555847},{"id":"https://openalex.org/keywords/coreference","display_name":"Coreference","score":0.42679259181022644},{"id":"https://openalex.org/keywords/decision-tree","display_name":"Decision tree","score":0.4256255328655243},{"id":"https://openalex.org/keywords/prosody","display_name":"Prosody","score":0.41631758213043213},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.32995739579200745}],"concepts":[{"id":"https://openalex.org/C46686674","wikidata":"https://www.wikidata.org/wiki/Q466303","display_name":"Boosting (machine learning)","level":2,"score":0.7362356185913086},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7305577993392944},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.676609456539154},{"id":"https://openalex.org/C110384440","wikidata":"https://www.wikidata.org/wiki/Q1143270","display_name":"Upsampling","level":3,"score":0.6299682855606079},{"id":"https://openalex.org/C197323446","wikidata":"https://www.wikidata.org/wiki/Q331222","display_name":"Oversampling","level":3,"score":0.594347357749939},{"id":"https://openalex.org/C45942800","wikidata":"https://www.wikidata.org/wiki/Q245652","display_name":"Ensemble learning","level":2,"score":0.5652422904968262},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5492269992828369},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5388215780258179},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49648934602737427},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.47458788752555847},{"id":"https://openalex.org/C28076734","wikidata":"https://www.wikidata.org/wiki/Q63087","display_name":"Coreference","level":3,"score":0.42679259181022644},{"id":"https://openalex.org/C84525736","wikidata":"https://www.wikidata.org/wiki/Q831366","display_name":"Decision tree","level":2,"score":0.4256255328655243},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.41631758213043213},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.32995739579200745},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C31258907","wikidata":"https://www.wikidata.org/wiki/Q1301371","display_name":"Computer network","level":1,"score":0.0},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.0},{"id":"https://openalex.org/C2776257435","wikidata":"https://www.wikidata.org/wiki/Q1576430","display_name":"Bandwidth (computing)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.21437/interspeech.2004-573","is_oa":false,"landing_page_url":"https://doi.org/10.21437/interspeech.2004-573","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2004","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.4000000059604645,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":8,"referenced_works":["https://openalex.org/W1504308419","https://openalex.org/W1511530654","https://openalex.org/W1881647329","https://openalex.org/W2070534370","https://openalex.org/W2166637769","https://openalex.org/W2426479676","https://openalex.org/W2912934387","https://openalex.org/W4285719527"],"related_works":["https://openalex.org/W2139373276","https://openalex.org/W2227889443","https://openalex.org/W2766503024","https://openalex.org/W1509033667","https://openalex.org/W4385749782","https://openalex.org/W3167631113","https://openalex.org/W2145164276","https://openalex.org/W2004630825","https://openalex.org/W2324061017","https://openalex.org/W1983404483"],"abstract_inverted_index":{"We":[0,34],"investigate":[1],"machine":[2],"learning":[3,53],"techniques":[4],"for":[5,29,60],"coping":[6],"with":[7],"highly":[8],"skewed":[9],"class":[10],"distributions":[11],"in":[12],"two":[13],"spontaneous":[14],"speech":[15],"processing":[16,32],"tasks.":[17],"Both":[18],"tasks,":[19,73],"sentence":[20],"boundary":[21],"and":[22,49,52,58,89],"disfluency":[23],"detection,":[24],"provide":[25],"important":[26],"structural":[27],"information":[28],"downstream":[30],"language":[31],"modules.":[33],"examine":[35],"the":[36,79],"effect":[37],"of":[38],"data":[39,86],"set":[40],"size,":[41],"task,":[42],"sampling":[43],"method":[44,54],"(no":[45],"sampling,":[46],"downsampling,":[47],"oversampling,":[48],"ensemble":[50,56,82],"sampling),":[51],"(bagging,":[55],"bagging,":[57],"boosting)":[59],"a":[61],"decision":[62],"tree":[63],"prosody":[64],"model.":[65],"Results":[66],"show":[67],"that":[68],"(1)":[69],"bagging":[70,83,94],"benefits":[71],"both":[72],"but":[74],"to":[75],"different":[76],"degrees,":[77],"(2)":[78],"benefit":[80],"from":[81],"decreases":[84],"as":[85],"size":[87],"increases,":[88],"(3)":[90],"boosting":[91],"can":[92],"outperform":[93],"under":[95],"certain":[96],"conditions.":[97]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1},{"year":2014,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
