{"id":"https://openalex.org/W4399198483","doi":"https://doi.org/10.48550/arxiv.2405.19041","title":"BLSP-KD: Bootstrapping Language-Speech Pre-training via Knowledge Distillation","display_name":"BLSP-KD: Bootstrapping Language-Speech Pre-training via Knowledge Distillation","publication_year":2024,"publication_date":"2024-05-29","ids":{"openalex":"https://openalex.org/W4399198483","doi":"https://doi.org/10.48550/arxiv.2405.19041"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2405.19041","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.19041","pdf_url":"https://arxiv.org/pdf/2405.19041","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2405.19041","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100337719","display_name":"Chen Wang","orcid":"https://orcid.org/0009-0007-5475-1554"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Wang, Chen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5073673415","display_name":"Minpeng Liao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liao, Minpeng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100873578","display_name":"Zhongqiang Huang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Huang, Zhongqiang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5100319572","display_name":"Jiajun Zhang","orcid":"https://orcid.org/0000-0001-5293-7434"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Jiajun","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5100337719"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9970999956130981,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10201","display_name":"Speech Recognition and Synthesis","score":0.9854999780654907,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9843999743461609,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/bootstrapping","display_name":"Bootstrapping (finance)","score":0.842536985874176},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5916635394096375},{"id":"https://openalex.org/keywords/training","display_name":"Training (meteorology)","score":0.5311557054519653},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4839744567871094},{"id":"https://openalex.org/keywords/distillation","display_name":"Distillation","score":0.43605300784111023},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.37399035692214966},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.354655921459198},{"id":"https://openalex.org/keywords/econometrics","display_name":"Econometrics","score":0.1967606246471405},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.1679714322090149},{"id":"https://openalex.org/keywords/chemistry","display_name":"Chemistry","score":0.13024282455444336},{"id":"https://openalex.org/keywords/chromatography","display_name":"Chromatography","score":0.10455977916717529},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.08526235818862915}],"concepts":[{"id":"https://openalex.org/C207609745","wikidata":"https://www.wikidata.org/wiki/Q4944086","display_name":"Bootstrapping (finance)","level":2,"score":0.842536985874176},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5916635394096375},{"id":"https://openalex.org/C2777211547","wikidata":"https://www.wikidata.org/wiki/Q17141490","display_name":"Training (meteorology)","level":2,"score":0.5311557054519653},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4839744567871094},{"id":"https://openalex.org/C204030448","wikidata":"https://www.wikidata.org/wiki/Q101017","display_name":"Distillation","level":2,"score":0.43605300784111023},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37399035692214966},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.354655921459198},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.1967606246471405},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.1679714322090149},{"id":"https://openalex.org/C185592680","wikidata":"https://www.wikidata.org/wiki/Q2329","display_name":"Chemistry","level":0,"score":0.13024282455444336},{"id":"https://openalex.org/C43617362","wikidata":"https://www.wikidata.org/wiki/Q170050","display_name":"Chromatography","level":1,"score":0.10455977916717529},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.08526235818862915},{"id":"https://openalex.org/C153294291","wikidata":"https://www.wikidata.org/wiki/Q25261","display_name":"Meteorology","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2405.19041","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.19041","pdf_url":"https://arxiv.org/pdf/2405.19041","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2405.19041","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2405.19041","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2405.19041","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2405.19041","pdf_url":"https://arxiv.org/pdf/2405.19041","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1534274833","https://openalex.org/W3117246195","https://openalex.org/W156620619","https://openalex.org/W2616249226","https://openalex.org/W2098233217","https://openalex.org/W2914363205","https://openalex.org/W2997844990","https://openalex.org/W1598221548","https://openalex.org/W2081850291","https://openalex.org/W1963695443"],"abstract_inverted_index":{"Recent":[0],"end-to-end":[1,126],"approaches":[2],"have":[3],"shown":[4],"promise":[5],"in":[6,18],"extending":[7,151],"large":[8],"language":[9,155],"models":[10],"(LLMs)":[11],"to":[12,27,32,86,153],"speech":[13,73,88,114,143],"inputs,":[14],"but":[15],"face":[16],"limitations":[17,52],"directly":[19],"assessing":[20],"and":[21,25,74,128],"optimizing":[22],"alignment":[23,30,61],"quality":[24],"fail":[26],"achieve":[28],"fine-grained":[29,98],"due":[31],"speech-text":[33,60],"length":[34],"mismatch.":[35],"We":[36,100],"introduce":[37,102],"BLSP-KD,":[38],"a":[39,83,106],"novel":[40],"approach":[41,146],"for":[42,72,113,140,150],"Bootstrapping":[43],"Language-Speech":[44],"Pretraining":[45],"via":[46],"Knowledge":[47],"Distillation,":[48],"which":[49],"addresses":[50],"these":[51],"through":[53],"two":[54],"key":[55],"techniques.":[56],"First,":[57],"it":[58,81],"optimizes":[59],"by":[62],"minimizing":[63],"the":[64,67],"divergence":[65],"between":[66],"LLM's":[68],"next-token":[69],"prediction":[70],"distributions":[71],"text":[75,95],"inputs":[76,115],"using":[77],"knowledge":[78,117],"distillation.":[79,118],"Second,":[80],"employs":[82],"continuous-integrate-andfire":[84],"strategy":[85],"segment":[87],"into":[89],"tokens":[90],"that":[91,122],"correspond":[92],"one-to-one":[93],"with":[94,131,142],"tokens,":[96],"enabling":[97],"alignment.":[99],"also":[101],"Partial":[103],"LoRA":[104],"(PLoRA),":[105],"new":[107,148],"adaptation":[108],"method":[109],"supporting":[110],"LLM":[111],"finetuning":[112],"under":[116],"Quantitative":[119],"evaluation":[120],"shows":[121],"BLSP-KD":[123],"outperforms":[124],"previous":[125],"baselines":[127],"cascaded":[129],"systems":[130],"comparable":[132],"scale":[133],"of":[134],"parameters,":[135],"facilitating":[136],"general":[137],"instruction-following":[138],"capabilities":[139],"LLMs":[141,152],"inputs.":[144],"This":[145],"provides":[147],"possibilities":[149],"spoken":[154],"interactions.":[156]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
