{"id":"https://openalex.org/W4401246677","doi":"https://doi.org/10.1109/taslp.2024.3436618","title":"SpeechPrompt: Prompting Speech Language Models for Speech Processing Tasks","display_name":"SpeechPrompt: Prompting Speech Language Models for Speech Processing Tasks","publication_year":2024,"publication_date":"2024-01-01","ids":{"openalex":"https://openalex.org/W4401246677","doi":"https://doi.org/10.1109/taslp.2024.3436618"},"language":"en","primary_location":{"id":"doi:10.1109/taslp.2024.3436618","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3436618","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},"type":"article","indexed_in":["arxiv","crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2408.13040","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016340997","display_name":"Kai-Wei Chang","orcid":"https://orcid.org/0009-0001-1562-7282"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Kai-Wei Chang","raw_affiliation_strings":["Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101474128","display_name":"Haibin Wu","orcid":"https://orcid.org/0000-0001-7166-5534"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Haibin Wu","raw_affiliation_strings":["Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5076748207","display_name":"Yu-Kai Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I74973139","display_name":"Carnegie Mellon University","ror":"https://ror.org/05x2bcf33","country_code":"US","type":"education","lineage":["https://openalex.org/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yu-Kai Wang","raw_affiliation_strings":["Carnegie Mellon University, Pittsburgh, PA, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University, Pittsburgh, PA, USA","institution_ids":["https://openalex.org/I74973139"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5078327190","display_name":"Yuan-Kuei Wu","orcid":"https://orcid.org/0000-0002-3044-8709"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yuan-Kuei Wu","raw_affiliation_strings":["Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101501252","display_name":"Hua Shen","orcid":"https://orcid.org/0000-0002-4928-525X"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Hua Shen","raw_affiliation_strings":["University of Michigan, Ann Arbor, MI, USA"],"affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002538444","display_name":"Wei\u2010Cheng Tseng","orcid":"https://orcid.org/0000-0003-3528-8633"},"institutions":[{"id":"https://openalex.org/I86519309","display_name":"The University of Texas at Austin","ror":"https://ror.org/00hj54h04","country_code":"US","type":"education","lineage":["https://openalex.org/I86519309"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Wei-Cheng Tseng","raw_affiliation_strings":["University of Texas at Austin, Austin, TX, USA"],"affiliations":[{"raw_affiliation_string":"University of Texas at Austin, Austin, TX, USA","institution_ids":["https://openalex.org/I86519309"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049287399","display_name":"Iu-thing Kang","orcid":null},"institutions":[{"id":"https://openalex.org/I4210148979","display_name":"MediaTek (Taiwan)","ror":"https://ror.org/05g9jck81","country_code":"TW","type":"company","lineage":["https://openalex.org/I4210148979"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Iu-Thing Kang","raw_affiliation_strings":["MediaTek, Hsinchu, Taiwan"],"affiliations":[{"raw_affiliation_string":"MediaTek, Hsinchu, Taiwan","institution_ids":["https://openalex.org/I4210148979"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029566548","display_name":"Shang-Wen Li","orcid":"https://orcid.org/0000-0003-0656-9874"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang-Wen Li","raw_affiliation_strings":["FAIR, Meta, Menlo Park, CA, USA"],"affiliations":[{"raw_affiliation_string":"FAIR, Meta, Menlo Park, CA, USA","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5040508737","display_name":"Hung-yi Lee","orcid":"https://orcid.org/0000-0002-9654-5747"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-Yi Lee","raw_affiliation_strings":["Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan"],"affiliations":[{"raw_affiliation_string":"Graduate Institute of Communication Engineering, National Taiwan University, Taipei City, Taiwan","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":9,"corresponding_author_ids":["https://openalex.org/A5016340997"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":1.7227,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":{"value":0.86652667,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":"32","issue":null,"first_page":"3730","last_page":"3744"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9994999766349792,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.6472387909889221},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.5466998815536499},{"id":"https://openalex.org/keywords/speech-processing","display_name":"Speech processing","score":0.48744824528694153},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.39231887459754944}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6472387909889221},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.5466998815536499},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.48744824528694153},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.39231887459754944}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1109/taslp.2024.3436618","is_oa":false,"landing_page_url":"https://doi.org/10.1109/taslp.2024.3436618","pdf_url":null,"source":{"id":"https://openalex.org/S4210169297","display_name":"IEEE/ACM Transactions on Audio Speech and Language Processing","issn_l":"2329-9290","issn":["2329-9290","2329-9304"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310319808","host_organization_name":"Institute of Electrical and Electronics Engineers","host_organization_lineage":["https://openalex.org/P4310319808"],"host_organization_lineage_names":["Institute of Electrical and Electronics Engineers"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"IEEE/ACM Transactions on Audio, Speech, and Language Processing","raw_type":"journal-article"},{"id":"pmh:oai:arXiv.org:2408.13040","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.13040","pdf_url":"https://arxiv.org/pdf/2408.13040","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2408.13040","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2408.13040","pdf_url":"https://arxiv.org/pdf/2408.13040","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4401246677.pdf","grobid_xml":"https://content.openalex.org/works/W4401246677.grobid-xml"},"referenced_works_count":78,"referenced_works":["https://openalex.org/W1494198834","https://openalex.org/W2797583228","https://openalex.org/W2951082691","https://openalex.org/W2964243274","https://openalex.org/W2972495969","https://openalex.org/W2972584841","https://openalex.org/W2995181338","https://openalex.org/W3015265920","https://openalex.org/W3016011332","https://openalex.org/W3016181583","https://openalex.org/W3034999214","https://openalex.org/W3041561163","https://openalex.org/W3096251052","https://openalex.org/W3098267758","https://openalex.org/W3108231750","https://openalex.org/W3134187040","https://openalex.org/W3140429000","https://openalex.org/W3153427360","https://openalex.org/W3160747466","https://openalex.org/W3161223924","https://openalex.org/W3172642864","https://openalex.org/W3180374548","https://openalex.org/W3185341429","https://openalex.org/W3188542058","https://openalex.org/W3189296823","https://openalex.org/W3196509775","https://openalex.org/W3197580070","https://openalex.org/W3198217962","https://openalex.org/W3198608154","https://openalex.org/W3209059054","https://openalex.org/W4205991051","https://openalex.org/W4226162428","https://openalex.org/W4281492411","https://openalex.org/W4285250921","https://openalex.org/W4285286749","https://openalex.org/W4287887366","https://openalex.org/W4288089799","https://openalex.org/W4292825791","https://openalex.org/W4295308567","https://openalex.org/W4296070387","https://openalex.org/W4297841405","https://openalex.org/W4297841687","https://openalex.org/W4312651322","https://openalex.org/W4322766882","https://openalex.org/W4372260337","https://openalex.org/W4372270126","https://openalex.org/W4381786045","https://openalex.org/W4385245566","https://openalex.org/W4385567149","https://openalex.org/W4385822683","https://openalex.org/W4385822890","https://openalex.org/W4385823335","https://openalex.org/W4386187806","https://openalex.org/W4391021530","https://openalex.org/W4391021627","https://openalex.org/W4392902623","https://openalex.org/W4392909068","https://openalex.org/W4392909760","https://openalex.org/W4393157525","https://openalex.org/W4394671563","https://openalex.org/W6746278845","https://openalex.org/W6750665317","https://openalex.org/W6752946794","https://openalex.org/W6769627184","https://openalex.org/W6777335856","https://openalex.org/W6780218876","https://openalex.org/W6783867762","https://openalex.org/W6788231366","https://openalex.org/W6790356757","https://openalex.org/W6796456916","https://openalex.org/W6810313920","https://openalex.org/W6810673746","https://openalex.org/W6838356808","https://openalex.org/W6847363464","https://openalex.org/W6852781825","https://openalex.org/W6853530687","https://openalex.org/W6857054612","https://openalex.org/W6917585676"],"related_works":["https://openalex.org/W2981428355","https://openalex.org/W1834994814","https://openalex.org/W2041273198","https://openalex.org/W1599055764","https://openalex.org/W2131711534","https://openalex.org/W2149163000","https://openalex.org/W2962858469","https://openalex.org/W2289873871","https://openalex.org/W2559040841","https://openalex.org/W114661351"],"abstract_inverted_index":{"Prompting":[0],"has":[1,119],"become":[2,78],"a":[3,61,121,188,205,234],"practical":[4],"method":[5,217,227,242],"for":[6,69,130,173],"utilizing":[7],"pre-trained":[8],"language":[9,53,131],"models":[10,54,232],"(LMs).":[11],"This":[12,64],"approach":[13],"offers":[14],"several":[15],"advantages.":[16],"It":[17],"allows":[18],"an":[19],"LM":[20,90],"to":[21,23,55,103,223],"adapt":[22],"new":[24],"tasks":[25,59,86,182,194],"with":[26,233,252],"minimal":[27],"training":[28],"and":[29,38,47,201],"parameter":[30],"updates,":[31],"thus":[32],"achieving":[33],"efficiency":[34],"in":[35,60,72,111,124,247],"both":[36],"storage":[37],"computation.":[39],"Additionally,":[40],"prompting":[41,108,148,208,216,241,263],"modifies":[42],"only":[43,151],"the":[44,49,67,83,89,95,101,105,112,215,224,248,253,259,261],"LM's":[45],"inputs":[46],"harnesses":[48],"generative":[50],"capabilities":[51],"of":[52,85,97,107,114,237],"address":[56],"various":[57],"downstream":[58],"unified":[62,147,207],"manner.":[63],"significantly":[65],"reduces":[66],"need":[68],"human":[70],"labor":[71],"designing":[73],"task-specific":[74],"models.":[75],"These":[76],"advantages":[77],"even":[79],"more":[80],"evident":[81],"as":[82,155,196],"number":[84,236],"served":[87],"by":[88,94],"scales":[91],"up.":[92],"Motivated":[93],"strengths":[96],"prompting,":[98],"we":[99,178,190],"are":[100,142],"first":[102],"explore":[104],"potential":[106],"speech":[109,115,126,140,171,174,180,197,202,255],"LMs":[110,256],"domain":[113],"processing.":[116],"Recently,":[117],"there":[118],"been":[120],"growing":[122],"interest":[123],"converting":[125],"into":[127,170,183,258],"discrete":[128],"units":[129,141],"modeling.":[132],"Our":[133],"pioneer":[134],"research":[135],"demonstrates":[136],"that":[137,165,214],"these":[138],"quantized":[139],"highly":[143],"versatile":[144],"within":[145,204],"our":[146],"framework.":[149,209],"Not":[150],"can":[152,166,191,218],"they":[153,159],"serve":[154],"class":[156],"labels,":[157],"but":[158],"also":[160,243],"contain":[161],"rich":[162],"phonetic":[163],"information":[164],"be":[167],"re-synthesized":[168],"back":[169],"signals":[172],"generation":[175,185,203],"tasks.":[176,186],"Specifically,":[177],"reformulate":[179],"processing":[181],"speech-to-unit":[184],"As":[187],"result,":[189],"seamlessly":[192],"integrate":[193],"such":[195],"classification,":[198],"sequence":[199],"generation,":[200],"single,":[206],"The":[210,240],"experiment":[211],"results":[212,246],"show":[213],"achieve":[219],"competitive":[220],"performance":[221],"compared":[222],"strong":[225],"fine-tuning":[226],"based":[228],"on":[229],"self-supervised":[230],"learning":[231],"similar":[235],"trainable":[238],"parameters.":[239],"shows":[244],"promising":[245],"few-shot":[249],"setting.":[250],"Moreover,":[251],"advanced":[254],"coming":[257],"stage,":[260],"proposed":[262],"framework":[264],"attains":[265],"great":[266],"potential.":[267]},"counts_by_year":[{"year":2025,"cited_by_count":5}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
