{"id":"https://openalex.org/W2951974815","doi":"https://doi.org/10.1109/taslp.2019.2922832","title":"Audio Word2vec: Sequence-to-Sequence Autoencoding for Unsupervised Learning of Audio Segmentation and Representation","display_name":"Audio Word2vec: Sequence-to-Sequence Autoencoding for Unsupervised Learning of Audio Segmentation and Representation","publication_year":2019,"publication_date":"2019-06-13","ids":{"openalex":"https://openalex.org/W2951974815","doi":"https://doi.org/10.1109/taslp.2019.2922832","mag":"2951974815"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2019.2922832","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2922832","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034206959","display_name":"Yi-Chen Chen","orcid":"https://orcid.org/0000-0001-8969-9927"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yi-Chen Chen","raw_affiliation_strings":["Graduate Institute of Communication Engineering, National Taiwan University, Taipei, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5071504898","display_name":"Sung-Feng Huang","orcid":"https://orcid.org/0000-0002-9720-811X"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Sung-Feng Huang","raw_affiliation_strings":["Graduate Institute of Communication Engineering, National Taiwan University, Taipei, Taiwan"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040508737","display_name":"Hung-yi Lee","orcid":"https://orcid.org/0000-0002-9654-5747"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-yi Lee","raw_affiliation_strings":["Department of Electrical Engineering, National Taiwan University, Taipei, Taiwan"],"raw_orcid":"https://orcid.org/0000-0002-9654-5747","affiliations":[{"raw_affiliation_string":"Department of Electrical Engineering, National Taiwan University, Taipei, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080785619","display_name":"Yu\u2010Hsuan Wang","orcid":"https://orcid.org/0000-0002-6137-6840"},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu-Hsuan Wang","raw_affiliation_strings":["Carnegie Mellon University, Language Technologies Institute, Pittsburgh, PA, USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Language Technologies Institute, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5085585975","display_name":"Chia-Hao Shen","orcid":"https://orcid.org/0000-0001-8472-3596"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chia-Hao Shen","raw_affiliation_strings":["CompStak, New York, NY, USA"],"raw_orcid":"https://orcid.org/0000-0001-8472-3596","affiliations":[{"raw_affiliation_string":"CompStak, New York, NY, USA","institution_ids":[]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.9044,"has_fulltext":false,"cited_by_count":42,"citation_normalized_percentile":{"value":0.94874229,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":99},"biblio":{"volume":"27","issue":"9","first_page":"1481","last_page":"1493"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/word2vec","display_name":"Word2vec","score":0.943744957447052},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7921831607818604},{"id":"https://openalex.org/keywords/autoencoder","display_name":"Autoencoder","score":0.7286253571510315},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6205174326896667},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5818834900856018},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5669342875480652},{"id":"https://openalex.org/keywords/audio-mining","display_name":"Audio mining","score":0.543258011341095},{"id":"https://openalex.org/keywords/encoder","display_name":"Encoder","score":0.4995303153991699},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.49678996205329895},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.49107933044433594},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.4825279414653778},{"id":"https://openalex.org/keywords/unsupervised-learning","display_name":"Unsupervised learning","score":0.4322665333747864},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.4282483458518982},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.3314582109451294},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.2997417449951172},{"id":"https://openalex.org/keywords/acoustic-model","display_name":"Acoustic model","score":0.2610042691230774},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08841514587402344}],"concepts":[{"id":"https://openalex.org/C2776461190","wikidata":"https://www.wikidata.org/wiki/Q22673982","display_name":"Word2vec","level":3,"score":0.943744957447052},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7921831607818604},{"id":"https://openalex.org/C101738243","wikidata":"https://www.wikidata.org/wiki/Q786435","display_name":"Autoencoder","level":3,"score":0.7286253571510315},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6205174326896667},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5818834900856018},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5669342875480652},{"id":"https://openalex.org/C157968479","wikidata":"https://www.wikidata.org/wiki/Q3079876","display_name":"Audio mining","level":4,"score":0.543258011341095},{"id":"https://openalex.org/C118505674","wikidata":"https://www.wikidata.org/wiki/Q42586063","display_name":"Encoder","level":2,"score":0.4995303153991699},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.49678996205329895},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49107933044433594},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.4825279414653778},{"id":"https://openalex.org/C8038995","wikidata":"https://www.wikidata.org/wiki/Q1152135","display_name":"Unsupervised learning","level":2,"score":0.4322665333747864},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.4282483458518982},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.3314582109451294},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.2997417449951172},{"id":"https://openalex.org/C155635449","wikidata":"https://www.wikidata.org/wiki/Q4674699","display_name":"Acoustic model","level":3,"score":0.2610042691230774},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08841514587402344},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.0},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/taslp.2019.2922832","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2019.2922832","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education","score":0.7099999785423279}],"awards":[],"funders":[{"id":"https://openalex.org/F4320322795","display_name":"Ministry of Science and Technology, Taiwan","ror":"https://ror.org/02kv4zf79"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":121,"referenced_works":["https://openalex.org/W30845872","https://openalex.org/W66627554","https://openalex.org/W111477576","https://openalex.org/W113159538","https://openalex.org/W1486649854","https://openalex.org/W1494198834","https://openalex.org/W1496120315","https://openalex.org/W1522301498","https://openalex.org/W1577418252","https://openalex.org/W1614298861","https://openalex.org/W1731081199","https://openalex.org/W2025482506","https://openalex.org/W2030422732","https://openalex.org/W2036964623","https://openalex.org/W2059652594","https://openalex.org/W2079460648","https://openalex.org/W2099415988","https://openalex.org/W2100495367","https://openalex.org/W2100768664","https://openalex.org/W2105016867","https://openalex.org/W2110589736","https://openalex.org/W2114347655","https://openalex.org/W2115613106","https://openalex.org/W2116435618","https://openalex.org/W2121997342","https://openalex.org/W2126203737","https://openalex.org/W2130942839","https://openalex.org/W2131744502","https://openalex.org/W2153519094","https://openalex.org/W2153579005","https://openalex.org/W2155027007","https://openalex.org/W2157331557","https://openalex.org/W2181347294","https://openalex.org/W2190506272","https://openalex.org/W2242818861","https://openalex.org/W2295088914","https://openalex.org/W2295297373","https://openalex.org/W2296681920","https://openalex.org/W2401464865","https://openalex.org/W2401725913","https://openalex.org/W2404952642","https://openalex.org/W2468716020","https://openalex.org/W2516890051","https://openalex.org/W2545319977","https://openalex.org/W2550241133","https://openalex.org/W2566587499","https://openalex.org/W2578392894","https://openalex.org/W2586756136","https://openalex.org/W2593011301","https://openalex.org/W2736601468","https://openalex.org/W2758697525","https://openalex.org/W2758785877","https://openalex.org/W2769025471","https://openalex.org/W2770776165","https://openalex.org/W2773389940","https://openalex.org/W2786608204","https://openalex.org/W2802557066","https://openalex.org/W2884305338","https://openalex.org/W2889313720","https://openalex.org/W2950577311","https://openalex.org/W2951216052","https://openalex.org/W2962736743","https://openalex.org/W2962799131","https://openalex.org/W2962850167","https://openalex.org/W2962879692","https://openalex.org/W2962980711","https://openalex.org/W2963206684","https://openalex.org/W2963223306","https://openalex.org/W2963364041","https://openalex.org/W2963425185","https://openalex.org/W2963443217","https://openalex.org/W2963571336","https://openalex.org/W2963618559","https://openalex.org/W2963620343","https://openalex.org/W2963720603","https://openalex.org/W2963735467","https://openalex.org/W2964096699","https://openalex.org/W2964121744","https://openalex.org/W2964169922","https://openalex.org/W3098643042","https://openalex.org/W4294170691","https://openalex.org/W4295521014","https://openalex.org/W6601311673","https://openalex.org/W6602682705","https://openalex.org/W6604441197","https://openalex.org/W6604665916","https://openalex.org/W6629028937","https://openalex.org/W6631190155","https://openalex.org/W6636510571","https://openalex.org/W6637618735","https://openalex.org/W6675022971","https://openalex.org/W6677326919","https://openalex.org/W6677803786","https://openalex.org/W6679436768","https://openalex.org/W6679775712","https://openalex.org/W6682691769","https://openalex.org/W6683204974","https://openalex.org/W6685777803","https://openalex.org/W6690026940","https://openalex.org/W6697234459","https://openalex.org/W6697293080","https://openalex.org/W6697456849","https://openalex.org/W6712960331","https://openalex.org/W6713216870","https://openalex.org/W6725939724","https://openalex.org/W6726044619","https://openalex.org/W6729855024","https://openalex.org/W6731763572","https://openalex.org/W6735305794","https://openalex.org/W6735744823","https://openalex.org/W6735913928","https://openalex.org/W6736494419","https://openalex.org/W6739641445","https://openalex.org/W6741002519","https://openalex.org/W6741347854","https://openalex.org/W6745117592","https://openalex.org/W6745895022","https://openalex.org/W6745924425","https://openalex.org/W6753329955","https://openalex.org/W6779669310","https://openalex.org/W6973666849"],"related_works":["https://openalex.org/W2980729574","https://openalex.org/W2806873178","https://openalex.org/W2965146396","https://openalex.org/W2770818364","https://openalex.org/W4404095322","https://openalex.org/W4312416532","https://openalex.org/W2908961393","https://openalex.org/W2964096699","https://openalex.org/W2951974815","https://openalex.org/W2736350121"],"abstract_inverted_index":{"In":[0,59],"text,":[1],"word2vec":[2,29,89,104,114],"transforms":[3],"each":[4],"word":[5,92,109],"into":[6],"a":[7,22,39,100,139,145],"fixed-size":[8],"vector":[9,44,83],"used":[10],"as":[11,126],"the":[12,51,56,68,81,91,95,155],"basic":[13],"component":[14],"in":[15,34,67,105,143,154],"applications":[16],"of":[17,25,55,128,138],"natural":[18],"language":[19],"processing.":[20],"Given":[21],"large":[23],"collection":[24],"unannotated":[26],"audio,":[27],"audio":[28,57,88,103,113],"can":[30],"also":[31],"be":[32],"trained":[33,148],"an":[35],"unsupervised":[36,107],"way":[37],"using":[38],"sequence-to-sequence":[40,141],"autoencoder":[41],"(SA).":[42],"These":[43],"representations":[45],"are":[46,115,123],"shown":[47],"to":[48,94],"effectively":[49],"describe":[50],"sequential":[52],"phonetic":[53,75,131],"structures":[54],"segments.":[58],"this":[60,65],"paper,":[61],"we":[62,73,86],"further":[63],"extend":[64,87],"research":[66],"following":[69],"two":[70],"directions.":[71],"First,":[72],"disentangle":[74],"information":[76,79],"and":[77,112,118,121],"speaker":[78],"from":[80,90],"SA":[82],"representations.":[84],"Second,":[85],"level":[93,97],"utterance":[96],"by":[98,136],"proposing":[99],"new":[101],"segmental":[102,140],"which":[106,144],"spoken":[108],"boundary":[110],"segmentation":[111,146],"jointly":[116],"learned":[117],"mutually":[119],"enhanced,":[120],"utterances":[122],"directly":[124],"represented":[125],"sequences":[127],"vectors":[129],"carrying":[130],"information.":[132],"This":[133],"is":[134,152],"achieved":[135],"means":[137],"autoencoder,":[142],"gate":[147],"with":[149],"reinforcement":[150],"learning":[151],"inserted":[153],"encoder.":[156]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":8},{"year":2022,"cited_by_count":11},{"year":2021,"cited_by_count":12},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
