{"id":"https://openalex.org/W4403678733","doi":"https://doi.org/10.1109/icac61394.2024.10718747","title":"SpeechCraft: An Integrated Data Generation Pipeline from Videos for LLM Finetuning","display_name":"SpeechCraft: An Integrated Data Generation Pipeline from Videos for LLM Finetuning","publication_year":2024,"publication_date":"2024-08-28","ids":{"openalex":"https://openalex.org/W4403678733","doi":"https://doi.org/10.1109/icac61394.2024.10718747"},"language":"en","primary_location":{"id":"doi:10.1109/icac61394.2024.10718747","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icac61394.2024.10718747","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 29th International Conference on Automation and Computing (ICAC)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5114373553","display_name":"Jyothi Swaroop Arlagadda N","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jyothi Swaroop Arlagadda N","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114373554","display_name":"Venkata Sai Mahesh Vuppalapati","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Venkata Sai Mahesh Vuppalapati","raw_affiliation_strings":["Tubi TV,San Fransisco,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Tubi TV,San Fransisco,USA","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5094067365","display_name":"Shrey Modi","orcid":null},"institutions":[{"id":"https://openalex.org/I59897056","display_name":"California State University, Long Beach","ror":"https://ror.org/0080fxk18","country_code":"US","type":"education","lineage":["https://openalex.org/I59897056"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shrey Dharmendra Modi","raw_affiliation_strings":["California State University Long Beach,Long Beach,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"California State University Long Beach,Long Beach,USA","institution_ids":["https://openalex.org/I59897056"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5069304628","display_name":"Rahul Vishwakarma","orcid":null},"institutions":[{"id":"https://openalex.org/I59897056","display_name":"California State University, Long Beach","ror":"https://ror.org/0080fxk18","country_code":"US","type":"education","lineage":["https://openalex.org/I59897056"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Rahul Vishwakarma","raw_affiliation_strings":["California State University Long Beach,Long Beach,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"California State University Long Beach,Long Beach,USA","institution_ids":["https://openalex.org/I59897056"]}]},{"author_position":"last","author":{"id":null,"display_name":"Heer Shah","orcid":null},"institutions":[{"id":"https://openalex.org/I59897056","display_name":"California State University, Long Beach","ror":"https://ror.org/0080fxk18","country_code":"US","type":"education","lineage":["https://openalex.org/I59897056"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Heer Shah","raw_affiliation_strings":["California State University Long Beach,Long Beach,USA"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"California State University Long Beach,Long Beach,USA","institution_ids":["https://openalex.org/I59897056"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":1.5206,"has_fulltext":false,"cited_by_count":5,"citation_normalized_percentile":{"value":0.85807498,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":99},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"6"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9678999781608582,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9678999781608582,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9534000158309937,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7307420969009399},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.6688990592956543},{"id":"https://openalex.org/keywords/operating-system","display_name":"Operating system","score":0.07353624701499939}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7307420969009399},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.6688990592956543},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.07353624701499939}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icac61394.2024.10718747","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icac61394.2024.10718747","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2024 29th International Conference on Automation and Computing (ICAC)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https://openalex.org/W2079100340","https://openalex.org/W4254635861","https://openalex.org/W4319780902","https://openalex.org/W4392972103","https://openalex.org/W4400531852","https://openalex.org/W6600284362","https://openalex.org/W6751420435","https://openalex.org/W6778883912","https://openalex.org/W6850625674","https://openalex.org/W6852584927","https://openalex.org/W6861273305","https://openalex.org/W6861961595","https://openalex.org/W6862618468"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"Customizing":[0],"Large":[1],"Language":[2],"Models":[3],"(LLMs)":[4],"for":[5,53,109,140],"specific":[6],"tasks":[7],"demands":[8],"high-quality,":[9],"domain-specific":[10],"datasets.":[11,56],"Existing":[12],"solutions":[13],"often":[14],"struggle":[15],"with":[16,82,124],"extracting":[17],"meaningful":[18],"and":[19,29,46,93,147,188,196,214],"structured":[20,72,117],"data":[21,62,118,131,208,212],"from":[22],"unstructured":[23,67],"video":[24,68,83],"content,":[25],"leading":[26],"to":[27,41,47,96,172,201],"inefficiencies":[28],"limitations":[30],"in":[31,130,143,160,194,211],"LLM":[32,77,110,125,158,174],"training.":[33,78],"This":[34,100],"paper":[35],"is":[36,102],"motivated":[37],"by":[38],"the":[39,76,167,182,190],"need":[40],"address":[42],"these":[43,161],"pain":[44],"points":[45],"develop":[48],"a":[49,59,71,177],"more":[50],"effective":[51],"method":[52,60,179],"generating":[54],"high-quality":[55],"We":[57,150],"present":[58],"of":[61,169,184,192],"generation":[63],"pipeline":[64,155],"that":[65,122,153,180],"transforms":[66],"content":[69],"into":[70,105,119],"format,":[73],"which":[74,127],"improves":[75,181],"Our":[79,163],"approach":[80],"initiates":[81],"processing":[84],"techniques":[85],"such":[86],"as":[87],"object":[88],"detection,":[89],"speech-to-":[90],"text":[91],"transcription,":[92],"sentiment":[94],"analysis":[95],"extract":[97],"crucial":[98],"information.":[99],"information":[101],"then":[103],"refined":[104],"customized":[106],"datasets":[107,171],"optimized":[108],"input.":[111],"Further":[112],"stages":[113],"involve":[114],"adapting":[115],"this":[116],"different":[120],"formats":[121],"align":[123],"architectures,":[126],"enables":[128],"flexibility":[129],"utilization.":[132],"The":[133],"last":[134],"phase":[135],"focus":[136],"on":[137],"fine-tuning":[138],"LLMs":[139,193],"specialized":[141],"applications":[142],"both":[144],"software":[145],"environments":[146],"hardware":[148],"integrations.":[149],"also":[151],"demonstrate":[152],"our":[154,204],"significantly":[156],"enhances":[157],"performance":[159,217],"applications.":[162,220],"research":[164],"findings":[165],"emphasize":[166],"potential":[168],"video-based":[170],"augment":[173],"capabilities,":[175],"suggesting":[176],"scalable":[178],"efficiency":[183],"artificial":[185],"intelligence":[186],"training":[187],"expands":[189],"applicability":[191],"current":[195],"future":[197],"technological":[198],"landscapes.":[199],"Compared":[200],"traditional":[202],"methods,":[203],"solution":[205],"offers":[206],"improved":[207],"quality,":[209],"versatility":[210],"formats,":[213],"superior":[215],"model":[216],"across":[218],"diverse":[219]},"counts_by_year":[{"year":2026,"cited_by_count":3},{"year":2025,"cited_by_count":2}],"updated_date":"2026-06-13T07:54:00.901334","created_date":"2025-10-10T00:00:00"}
