{"id":"https://openalex.org/W7148361948","doi":"https://doi.org/10.1109/asru65441.2025.11434655","title":"On the Difficulty of Token-Level Modeling of Dysfluency and Fluency Shaping Artifacts","display_name":"On the Difficulty of Token-Level Modeling of Dysfluency and Fluency Shaping Artifacts","publication_year":2025,"publication_date":"2025-12-06","ids":{"openalex":"https://openalex.org/W7148361948","doi":"https://doi.org/10.1109/asru65441.2025.11434655"},"language":null,"primary_location":{"id":"doi:10.1109/asru65441.2025.11434655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120685667","display_name":"Kashaf Gulzar","orcid":null},"institutions":[{"id":"https://openalex.org/I4210100612","display_name":"Georg Simon Ohm University of Applied Sciences Nuremberg","ror":"https://ror.org/00nggaz43","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210100612"]}],"countries":["DE"],"is_corresponding":true,"raw_author_name":"Kashaf Gulzar","raw_affiliation_strings":["Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany"],"affiliations":[{"raw_affiliation_string":"Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany","institution_ids":["https://openalex.org/I4210100612"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5016617263","display_name":"Dominik Wagner","orcid":"https://orcid.org/0000-0002-3631-4138"},"institutions":[{"id":"https://openalex.org/I4210100612","display_name":"Georg Simon Ohm University of Applied Sciences Nuremberg","ror":"https://ror.org/00nggaz43","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210100612"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Dominik Wagner","raw_affiliation_strings":["Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany"],"affiliations":[{"raw_affiliation_string":"Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany","institution_ids":["https://openalex.org/I4210100612"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5070508803","display_name":"Sebastian P. Bayerl","orcid":"https://orcid.org/0000-0002-3502-9511"},"institutions":[{"id":"https://openalex.org/I4210142109","display_name":"Rosenheim Technical University of Applied Sciences","ror":"https://ror.org/03hbmgt12","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210142109"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sebastian P. Bayerl","raw_affiliation_strings":["Technische Hochschule Rosenheim,Germany"],"affiliations":[{"raw_affiliation_string":"Technische Hochschule Rosenheim,Germany","institution_ids":["https://openalex.org/I4210142109"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5000472771","display_name":"Florian H\u00f6nig","orcid":"https://orcid.org/0000-0002-8677-3420"},"institutions":[{"id":"https://openalex.org/I4210093428","display_name":"IMU Institut (Germany)","ror":"https://ror.org/005ev0c40","country_code":"DE","type":"company","lineage":["https://openalex.org/I4210093428"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Florian H\u00f6nig","raw_affiliation_strings":["KST Institut GmbH,Germany"],"affiliations":[{"raw_affiliation_string":"KST Institut GmbH,Germany","institution_ids":["https://openalex.org/I4210093428"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033302750","display_name":"Tobias Bocklet","orcid":"https://orcid.org/0009-0008-7780-8821"},"institutions":[{"id":"https://openalex.org/I4210100612","display_name":"Georg Simon Ohm University of Applied Sciences Nuremberg","ror":"https://ror.org/00nggaz43","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210100612"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tobias Bocklet","raw_affiliation_strings":["Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany"],"affiliations":[{"raw_affiliation_string":"Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany","institution_ids":["https://openalex.org/I4210100612"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5053498662","display_name":"Korbinian Riedhammer","orcid":"https://orcid.org/0000-0003-3582-2154"},"institutions":[{"id":"https://openalex.org/I4210100612","display_name":"Georg Simon Ohm University of Applied Sciences Nuremberg","ror":"https://ror.org/00nggaz43","country_code":"DE","type":"education","lineage":["https://openalex.org/I4210100612"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Korbinian Riedhammer","raw_affiliation_strings":["Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany"],"affiliations":[{"raw_affiliation_string":"Technische Hochschule N&#x00FC;rnberg Georg Simon Ohm,Germany","institution_ids":["https://openalex.org/I4210100612"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5120685667"],"corresponding_institution_ids":["https://openalex.org/I4210100612"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.81009303,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12684","display_name":"Stuttering Research and Treatment","score":0.9631999731063843,"subfield":{"id":"https://openalex.org/subfields/3203","display_name":"Clinical Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T12684","display_name":"Stuttering Research and Treatment","score":0.9631999731063843,"subfield":{"id":"https://openalex.org/subfields/3203","display_name":"Clinical Psychology"},"field":{"id":"https://openalex.org/fields/32","display_name":"Psychology"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10863","display_name":"Voice and Speech Disorders","score":0.008700000122189522,"subfield":{"id":"https://openalex.org/subfields/2737","display_name":"Physiology"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T11358","display_name":"Dysphagia Assessment and Management","score":0.007300000172108412,"subfield":{"id":"https://openalex.org/subfields/3616","display_name":"Speech and Hearing"},"field":{"id":"https://openalex.org/fields/36","display_name":"Health Professions"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/fluency","display_name":"Fluency","score":0.682200014591217},{"id":"https://openalex.org/keywords/transcription","display_name":"Transcription (linguistics)","score":0.541100025177002},{"id":"https://openalex.org/keywords/phonetic-transcription","display_name":"Phonetic transcription","score":0.5078999996185303},{"id":"https://openalex.org/keywords/german","display_name":"German","score":0.4790000021457672},{"id":"https://openalex.org/keywords/adaptation","display_name":"Adaptation (eye)","score":0.4424999952316284},{"id":"https://openalex.org/keywords/lexical-access","display_name":"Lexical access","score":0.4318999946117401},{"id":"https://openalex.org/keywords/stuttering","display_name":"Stuttering","score":0.4092999994754791},{"id":"https://openalex.org/keywords/natural","display_name":"Natural (archaeology)","score":0.39089998602867126}],"concepts":[{"id":"https://openalex.org/C2777413886","wikidata":"https://www.wikidata.org/wiki/Q3276013","display_name":"Fluency","level":2,"score":0.682200014591217},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.6614999771118164},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6305999755859375},{"id":"https://openalex.org/C179926584","wikidata":"https://www.wikidata.org/wiki/Q207714","display_name":"Transcription (linguistics)","level":2,"score":0.541100025177002},{"id":"https://openalex.org/C2777853878","wikidata":"https://www.wikidata.org/wiki/Q743569","display_name":"Phonetic transcription","level":2,"score":0.5078999996185303},{"id":"https://openalex.org/C154775046","wikidata":"https://www.wikidata.org/wiki/Q188","display_name":"German","level":2,"score":0.4790000021457672},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.4424999952316284},{"id":"https://openalex.org/C2987487971","wikidata":"https://www.wikidata.org/wiki/Q8096","display_name":"Lexical access","level":3,"score":0.4318999946117401},{"id":"https://openalex.org/C2781371259","wikidata":"https://www.wikidata.org/wiki/Q186676","display_name":"Stuttering","level":2,"score":0.4092999994754791},{"id":"https://openalex.org/C2776608160","wikidata":"https://www.wikidata.org/wiki/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.39089998602867126},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3901999890804291},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3833000063896179},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3562999963760376},{"id":"https://openalex.org/C108154423","wikidata":"https://www.wikidata.org/wiki/Q1469792","display_name":"Salience (neuroscience)","level":2,"score":0.32910001277923584},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.31130000948905945},{"id":"https://openalex.org/C59656382","wikidata":"https://www.wikidata.org/wiki/Q191536","display_name":"Conjunction (astronomy)","level":2,"score":0.3077000081539154},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.3037000000476837},{"id":"https://openalex.org/C180747234","wikidata":"https://www.wikidata.org/wiki/Q23373","display_name":"Cognitive psychology","level":1,"score":0.30250000953674316},{"id":"https://openalex.org/C542774811","wikidata":"https://www.wikidata.org/wiki/Q10880526","display_name":"Prosody","level":2,"score":0.2881999909877777},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.28690001368522644},{"id":"https://openalex.org/C137584468","wikidata":"https://www.wikidata.org/wiki/Q35395","display_name":"Phonetics","level":2,"score":0.28439998626708984},{"id":"https://openalex.org/C176982825","wikidata":"https://www.wikidata.org/wiki/Q835922","display_name":"Lexical analysis","level":2,"score":0.28279998898506165},{"id":"https://openalex.org/C164614171","wikidata":"https://www.wikidata.org/wiki/Q5204775","display_name":"DECIPHER","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.25130000710487366}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/asru65441.2025.11434655","is_oa":false,"landing_page_url":"https://doi.org/10.1109/asru65441.2025.11434655","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2025 IEEE Automatic Speech Recognition and Understanding Workshop (ASRU)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6131540536880493,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":19,"referenced_works":["https://openalex.org/W187723304","https://openalex.org/W2078816101","https://openalex.org/W2714176837","https://openalex.org/W3175745257","https://openalex.org/W3197433369","https://openalex.org/W3198298452","https://openalex.org/W3205665014","https://openalex.org/W4224917162","https://openalex.org/W4225369246","https://openalex.org/W4366549322","https://openalex.org/W4372260522","https://openalex.org/W4372266801","https://openalex.org/W4385987638","https://openalex.org/W4391021769","https://openalex.org/W4402111647","https://openalex.org/W4402111741","https://openalex.org/W4402112052","https://openalex.org/W4406461753","https://openalex.org/W4408565363"],"related_works":[],"abstract_inverted_index":{"Automatic":[0],"transcription":[1],"of":[2,104],"stuttered":[3,61],"speech":[4,14,62],"remains":[5],"a":[6,37,75],"challenge,":[7],"even":[8],"for":[9,93,108],"modern":[10],"end-to-end":[11],"(E2E)":[12],"automatic":[13],"recognition":[15],"(ASR)":[16],"frameworks.":[17],"Dysfluencies":[18],"and":[19,32,44,57,69],"fluency-shaping":[20],"artifacts":[21],"are":[22],"often":[23],"overlooked,":[24],"resulting":[25],"in":[26,115],"non-verbatim":[27],"transcriptions":[28],"with":[29,79],"limited":[30],"clinical":[31],"research":[33],"value.":[34],"We":[35],"propose":[36],"parameter-efficient":[38],"adaptation":[39,106],"method":[40],"to":[41],"decode":[42],"dysfluencies":[43],"fluency":[45],"modifications":[46],"as":[47],"special":[48],"tokens":[49],"within":[50],"transcriptions,":[51],"evaluated":[52],"on":[53,96],"simulated":[54],"(LibriStutter,":[55],"English)":[56],"natural":[58],"(KSoF,":[59],"German)":[60],"datasets.":[63],"To":[64],"mitigate":[65],"ASR":[66,110],"performance":[67,95],"disparities":[68],"bias":[70],"towards":[71],"English,":[72],"we":[73],"introduce":[74],"multi-step":[76],"fine-tuning":[77],"strategy":[78],"language-adaptive":[80],"pretraining.":[81],"Tokenization":[82],"analysis":[83],"further":[84],"highlights":[85],"the":[86,102],"tokenizer\u2019s":[87],"English-centric":[88],"bias,":[89],"which":[90],"poses":[91],"challenges":[92],"improving":[94],"German":[97],"data.":[98],"Our":[99],"findings":[100],"demonstrate":[101],"effectiveness":[103],"lightweight":[105],"techniques":[107],"dysfluency-aware":[109],"while":[111],"exposing":[112],"key":[113],"limitations":[114],"multilingual":[116],"E2E":[117],"systems.":[118]},"counts_by_year":[],"updated_date":"2026-04-03T16:44:17.987007","created_date":"2026-04-03T00:00:00"}
