{"id":"https://openalex.org/W3162138321","doi":"https://doi.org/10.1109/icassp39728.2021.9413819","title":"Ensemble Combination between Different Time Segmentations","display_name":"Ensemble Combination between Different Time Segmentations","publication_year":2021,"publication_date":"2021-05-13","ids":{"openalex":"https://openalex.org/W3162138321","doi":"https://doi.org/10.1109/icassp39728.2021.9413819","mag":"3162138321"},"language":"en","primary_location":{"id":"doi:10.1109/icassp39728.2021.9413819","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413819","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048201022","display_name":"Jeremy H. M. Wong","orcid":"https://orcid.org/0000-0003-3742-7510"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jeremy H. M. Wong","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115044944","display_name":"Dimitrios Dimitriadis","orcid":"https://orcid.org/0000-0001-8483-0105"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dimitrios Dimitriadis","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5053915317","display_name":"Kenichi Kumatani","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Kenichi Kumatani","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5034136587","display_name":"Yashesh Gaur","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yashesh Gaur","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5006028677","display_name":"George Polovets","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"George Polovets","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000583382","display_name":"Partha Parthasarathy","orcid":null},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Partha Parthasarathy","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5090360853","display_name":"Eric Sun","orcid":"https://orcid.org/0000-0001-8805-9864"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Eric Sun","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100365053","display_name":"Jinyu Li","orcid":"https://orcid.org/0000-0002-1089-9748"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Jinyu Li","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5101928537","display_name":"Yifan Gong","orcid":"https://orcid.org/0000-0002-3912-097X"},"institutions":[{"id":"https://openalex.org/I1290206253","display_name":"Microsoft (United States)","ror":"https://ror.org/00d0nc645","country_code":"US","type":"company","lineage":["https://openalex.org/I1290206253"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yifan Gong","raw_affiliation_strings":["Microsoft Speech and Language Group"],"affiliations":[{"raw_affiliation_string":"Microsoft Speech and Language Group","institution_ids":["https://openalex.org/I1290206253"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5048201022"],"corresponding_institution_ids":["https://openalex.org/I1290206253"],"apc_list":null,"apc_paid":null,"fwci":0.6094,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.65544287,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":null,"issue":null,"first_page":"6768","last_page":"6772"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9995999932289124,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7594845294952393},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.6666375398635864},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.5346395373344421},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.47474581003189087},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.47112345695495605},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.4397121071815491},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.4391584098339081},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.41622138023376465},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.3762398958206177},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.12879809737205505}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7594845294952393},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.6666375398635864},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.5346395373344421},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47474581003189087},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.47112345695495605},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.4397121071815491},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.4391584098339081},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.41622138023376465},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3762398958206177},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.12879809737205505},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp39728.2021.9413819","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp39728.2021.9413819","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2021 - 2021 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":33,"referenced_works":["https://openalex.org/W126222424","https://openalex.org/W173561343","https://openalex.org/W811578723","https://openalex.org/W1524333225","https://openalex.org/W1553004968","https://openalex.org/W1904457459","https://openalex.org/W1993721840","https://openalex.org/W2009150118","https://openalex.org/W2327501763","https://openalex.org/W2594610113","https://openalex.org/W2766219058","https://openalex.org/W2786207315","https://openalex.org/W2943162839","https://openalex.org/W2963326356","https://openalex.org/W2963540920","https://openalex.org/W2964084166","https://openalex.org/W2973122799","https://openalex.org/W3007905516","https://openalex.org/W3008283340","https://openalex.org/W3008849579","https://openalex.org/W3008898571","https://openalex.org/W3011339933","https://openalex.org/W3015194534","https://openalex.org/W3016155933","https://openalex.org/W3047056234","https://openalex.org/W3094780479","https://openalex.org/W4235542672","https://openalex.org/W4298175155","https://openalex.org/W6605210145","https://openalex.org/W6607114211","https://openalex.org/W6631362777","https://openalex.org/W6762053788","https://openalex.org/W6781993102"],"related_works":["https://openalex.org/W4379231730","https://openalex.org/W4389858081","https://openalex.org/W1569283511","https://openalex.org/W2391251536","https://openalex.org/W2501551404","https://openalex.org/W4298131179","https://openalex.org/W2113201962","https://openalex.org/W2362198218","https://openalex.org/W4385583601","https://openalex.org/W1982750869"],"abstract_inverted_index":{"Hypothesis-level":[0],"combination":[1,93,128,145],"between":[2,43,94,129,146],"multiple":[3],"models":[4,14],"can":[5,112],"often":[6],"yield":[7,113],"gains":[8,114],"in":[9,15,54],"speech":[10],"recognition.":[11,103],"However,":[12],"all":[13],"the":[16,23,36,44,50,68,109,122],"ensemble":[17],"are":[18,78],"usually":[19],"restricted":[20],"to":[21,31,63,71],"use":[22,37],"same":[24],"audio":[25,40],"segmentation":[26,41,119],"times.":[27,120],"This":[28],"paper":[29],"proposes":[30],"generalise":[32],"hypothesis-level":[33],"combination,":[34],"allowing":[35],"of":[38],"different":[39,118],"times":[42],"models,":[45],"by":[46,91],"splitting":[47,74],"and":[48,99,133],"re-joining":[49],"hypothesised":[51],"N-best":[52],"lists":[53],"time.":[55],"A":[56],"hypothesis":[57,65],"tree":[58],"method":[59],"is":[60,83],"also":[61,124],"proposed":[62,110],"distribute":[64],"posteriors":[66],"among":[67],"constituent":[69],"words,":[70],"facilitate":[72],"such":[73],"when":[75,115],"per-word":[76],"scores":[77],"not":[79],"available.":[80],"The":[81,104],"approach":[82,111],"assessed":[84],"on":[85],"a":[86,95,127,130,140,144],"Microsoft":[87],"meeting":[88],"transcription":[89],"task,":[90],"performing":[92],"streaming":[96],"first-pass":[97],"recognition":[98],"an":[100,134],"offline":[101],"second-pass":[102],"experimental":[105],"results":[106,123],"show":[107,125],"that":[108,126],"combining":[116],"over":[117],"Furthermore,":[121],"hybrid":[131,148],"model":[132,138],"end-to-end":[135],"neural":[136],"network":[137],"yields":[139],"greater":[141],"improvement":[142],"than":[143],"two":[147],"models.":[149]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
