{"id":"https://openalex.org/W4372346125","doi":"https://doi.org/10.1109/icassp49357.2023.10095363","title":"Unsupervised Word Segmentation Using Temporal Gradient Pseudo-Labels","display_name":"Unsupervised Word Segmentation Using Temporal Gradient Pseudo-Labels","publication_year":2023,"publication_date":"2023-05-05","ids":{"openalex":"https://openalex.org/W4372346125","doi":"https://doi.org/10.1109/icassp49357.2023.10095363"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49357.2023.10095363","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095363","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5062227175","display_name":"Tzeviya Sylvia Fuchs","orcid":"https://orcid.org/0000-0002-4666-9346"},"institutions":[{"id":"https://openalex.org/I13955877","display_name":"Bar-Ilan University","ror":"https://ror.org/03kgsv495","country_code":"IL","type":"education","lineage":["https://openalex.org/I13955877"]}],"countries":["IL"],"is_corresponding":true,"raw_author_name":"Tzeviya Sylvia Fuchs","raw_affiliation_strings":["Bar-Ilan University,Ramat-Gan,Israel","Bar-Ilan University, Ramat-Gan, Israel"],"affiliations":[{"raw_affiliation_string":"Bar-Ilan University,Ramat-Gan,Israel","institution_ids":["https://openalex.org/I13955877"]},{"raw_affiliation_string":"Bar-Ilan University, Ramat-Gan, Israel","institution_ids":["https://openalex.org/I13955877"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5047455929","display_name":"Yedid Hoshen","orcid":"https://orcid.org/0000-0002-0967-4541"},"institutions":[{"id":"https://openalex.org/I197251160","display_name":"Hebrew University of Jerusalem","ror":"https://ror.org/03qxff017","country_code":"IL","type":"education","lineage":["https://openalex.org/I197251160"]}],"countries":["IL"],"is_corresponding":false,"raw_author_name":"Yedid Hoshen","raw_affiliation_strings":["The Hebrew University of Jerusalem"],"affiliations":[{"raw_affiliation_string":"The Hebrew University of Jerusalem","institution_ids":["https://openalex.org/I197251160"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5062227175"],"corresponding_institution_ids":["https://openalex.org/I13955877"],"apc_list":null,"apc_paid":null,"fwci":0.8741,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.77893566,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11309","display_name":"Music and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1711","display_name":"Signal Processing"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7744759321212769},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.7322797179222107},{"id":"https://openalex.org/keywords/thresholding","display_name":"Thresholding","score":0.7285834550857544},{"id":"https://openalex.org/keywords/segmentation","display_name":"Segmentation","score":0.7041422724723816},{"id":"https://openalex.org/keywords/classifier","display_name":"Classifier (UML)","score":0.6432454586029053},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.6011132001876831},{"id":"https://openalex.org/keywords/word","display_name":"Word (group theory)","score":0.49269965291023254},{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.4883958697319031},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.47133898735046387},{"id":"https://openalex.org/keywords/text-segmentation","display_name":"Text segmentation","score":0.4519096612930298},{"id":"https://openalex.org/keywords/image","display_name":"Image (mathematics)","score":0.18946731090545654},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.15207439661026}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7744759321212769},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.7322797179222107},{"id":"https://openalex.org/C191178318","wikidata":"https://www.wikidata.org/wiki/Q2256906","display_name":"Thresholding","level":3,"score":0.7285834550857544},{"id":"https://openalex.org/C89600930","wikidata":"https://www.wikidata.org/wiki/Q1423946","display_name":"Segmentation","level":2,"score":0.7041422724723816},{"id":"https://openalex.org/C95623464","wikidata":"https://www.wikidata.org/wiki/Q1096149","display_name":"Classifier (UML)","level":2,"score":0.6432454586029053},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.6011132001876831},{"id":"https://openalex.org/C90805587","wikidata":"https://www.wikidata.org/wiki/Q10944557","display_name":"Word (group theory)","level":2,"score":0.49269965291023254},{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.4883958697319031},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.47133898735046387},{"id":"https://openalex.org/C98501671","wikidata":"https://www.wikidata.org/wiki/Q1948408","display_name":"Text segmentation","level":3,"score":0.4519096612930298},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.18946731090545654},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.15207439661026},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49357.2023.10095363","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49357.2023.10095363","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2023 - 2023 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.5199999809265137,"id":"https://metadata.un.org/sdg/4","display_name":"Quality Education"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":21,"referenced_works":["https://openalex.org/W130754613","https://openalex.org/W2010188467","https://openalex.org/W2126449874","https://openalex.org/W2144506857","https://openalex.org/W2145410271","https://openalex.org/W2146320039","https://openalex.org/W2468716020","https://openalex.org/W2747874407","https://openalex.org/W2964169922","https://openalex.org/W3036601975","https://openalex.org/W3096656254","https://openalex.org/W3097485645","https://openalex.org/W3098643042","https://openalex.org/W3198134274","https://openalex.org/W3198782837","https://openalex.org/W3209993061","https://openalex.org/W4256202811","https://openalex.org/W4296070447","https://openalex.org/W6780218876","https://openalex.org/W6787178341","https://openalex.org/W6803471033"],"related_works":["https://openalex.org/W2393940967","https://openalex.org/W2159591557","https://openalex.org/W2385598138","https://openalex.org/W2366925922","https://openalex.org/W2346578824","https://openalex.org/W2112534334","https://openalex.org/W2905950556","https://openalex.org/W2115592387","https://openalex.org/W120168696","https://openalex.org/W2153245103"],"abstract_inverted_index":{"Unsupervised":[0],"word":[1,34,50,136],"segmentation":[2,35],"in":[3,9],"audio":[4],"utterances":[5],"is":[6,12,80,134,153],"challenging":[7],"as,":[8],"speech,":[10],"there":[11],"typically":[13,81],"no":[14],"gap":[15],"between":[16,74],"words.":[17],"In":[18,140],"a":[19,54,94,104,110,117,132,135,138],"preliminary":[20],"experiment,":[21],"we":[22,52,124],"show":[23],"that":[24,63],"recent":[25],"deep":[26],"self-":[27],"supervised":[28],"features":[29],"are":[30],"very":[31],"effective":[32],"for":[33,39,106],"but":[36],"require":[37],"supervision":[38],"training":[40],"the":[41,61,64,69,72,75,85,90,98,114,121,126],"classification":[42],"head.":[43],"To":[44],"extend":[45],"their":[46],"effectiveness":[47],"to":[48,102,120,129,155],"unsupervised":[49],"segmentation,":[51],"propose":[53],"pseudo-labeling":[55],"strategy.":[56],"Our":[57],"approach":[58],"relies":[59],"on":[60,97,161],"observation":[62],"temporal":[65,99],"gradient":[66,100],"magnitude":[67,101],"of":[68,77,116],"embeddings":[70,76],"(i.e.":[71],"distance":[73],"subsequent":[78],"frames)":[79],"minimal":[82],"far":[83],"from":[84],"boundaries":[86],"and":[87,149],"higher":[88],"nearer":[89],"boundaries.":[91],"We":[92,108],"use":[93,125],"thresholding":[95],"function":[96],"define":[103],"psuedolabel":[105],"wordness.":[107],"train":[109],"linear":[111],"classifier,":[112],"mapping":[113],"embedding":[115],"single":[118],"frame":[119,133],"pseudo-label.":[122],"Finally,":[123],"classifier":[127],"score":[128],"predict":[130],"whether":[131],"or":[137],"boundary.":[139],"an":[141],"empirical":[142],"investigation,":[143],"our":[144],"method,":[145],"despite":[146],"its":[147],"simplicity":[148],"fast":[150],"run":[151],"time,":[152],"shown":[154],"significantly":[156],"outperform":[157],"all":[158],"previous":[159],"methods":[160],"two":[162],"datasets.":[163]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
