{"id":"https://openalex.org/W4403576851","doi":"https://doi.org/10.1145/3663548.3688494","title":"CARTGPT: Improving CART Captioning using Large Language Models","display_name":"CARTGPT: Improving CART Captioning using Large Language Models","publication_year":2024,"publication_date":"2024-10-20","ids":{"openalex":"https://openalex.org/W4403576851","doi":"https://doi.org/10.1145/3663548.3688494"},"language":"en","primary_location":{"id":"doi:10.1145/3663548.3688494","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3663548.3688494","pdf_url":null,"source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 26th International ACM SIGACCESS Conference on Computers and Accessibility","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5101348472","display_name":"Liang-Yuan Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]},{"id":"https://openalex.org/I4210111179","display_name":"Michigan United","ror":"https://ror.org/0291ys696","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210111179"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Liang-Yuan Wu","raw_affiliation_strings":["Computer Science and Engineering, University of Michigan, United States"],"raw_orcid":"https://orcid.org/0009-0008-3081-1134","affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, University of Michigan, United States","institution_ids":["https://openalex.org/I27837315","https://openalex.org/I4210111179"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114337039","display_name":"Andrea Kleiver","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Andrea Kleiver","raw_affiliation_strings":["Certified CART Captioner, United States"],"raw_orcid":"https://orcid.org/0009-0005-6285-6116","affiliations":[{"raw_affiliation_string":"Certified CART Captioner, United States","institution_ids":[]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5103096161","display_name":"Dhruv Jain","orcid":"https://orcid.org/0000-0001-6176-968X"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]},{"id":"https://openalex.org/I4210111179","display_name":"Michigan United","ror":"https://ror.org/0291ys696","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210111179"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Dhruv Jain","raw_affiliation_strings":["Computer Science and Engineering, University of Michigan, United States"],"raw_orcid":"https://orcid.org/0000-0001-6176-968X","affiliations":[{"raw_affiliation_string":"Computer Science and Engineering, University of Michigan, United States","institution_ids":["https://openalex.org/I27837315","https://openalex.org/I4210111179"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5101348472"],"corresponding_institution_ids":["https://openalex.org/I27837315","https://openalex.org/I4210111179"],"apc_list":null,"apc_paid":null,"fwci":0.6623,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.75687633,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":95,"max":96},"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9976000189781189,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9961000084877014,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/closed-captioning","display_name":"Closed captioning","score":0.8494265079498291},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8285989761352539},{"id":"https://openalex.org/keywords/cart","display_name":"Cart","score":0.8106671571731567},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6202508211135864},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4883084297180176},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.41673406958580017},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.0731172263622284}],"concepts":[{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.8494265079498291},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8285989761352539},{"id":"https://openalex.org/C2777275308","wikidata":"https://www.wikidata.org/wiki/Q234668","display_name":"Cart","level":2,"score":0.8106671571731567},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6202508211135864},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4883084297180176},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.41673406958580017},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.0731172263622284},{"id":"https://openalex.org/C78519656","wikidata":"https://www.wikidata.org/wiki/Q101333","display_name":"Mechanical engineering","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3663548.3688494","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3663548.3688494","pdf_url":null,"source":null,"license":"public-domain","license_id":"https://openalex.org/licenses/public-domain","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"The 26th International ACM SIGACCESS Conference on Computers and Accessibility","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W264645860","https://openalex.org/W1494198834","https://openalex.org/W1971631540","https://openalex.org/W2010246411","https://openalex.org/W2094954292","https://openalex.org/W2290318471","https://openalex.org/W2548398025","https://openalex.org/W2799473636","https://openalex.org/W2964539095","https://openalex.org/W3023626240","https://openalex.org/W3046375318","https://openalex.org/W3101648800","https://openalex.org/W3160638507","https://openalex.org/W3160766462","https://openalex.org/W4210764005","https://openalex.org/W4283012421","https://openalex.org/W4385128155"],"related_works":["https://openalex.org/W4210416330","https://openalex.org/W4400093351","https://openalex.org/W2775506363","https://openalex.org/W3088136942","https://openalex.org/W4290852288","https://openalex.org/W2949362007","https://openalex.org/W4283207562","https://openalex.org/W2963177403","https://openalex.org/W2330246314","https://openalex.org/W3204019825"],"abstract_inverted_index":{"Communication":[0],"Access":[1],"Realtime":[2],"Translation":[3],"(CART)":[4],"is":[5],"a":[6,30,73,101,115,129],"commonly":[7],"used":[8,12],"real-time":[9],"captioning":[10,54],"technology":[11],"by":[13,38],"deaf":[14],"and":[15,26,90,99,128],"hard":[16],"of":[17,33,63,145],"hearing":[18],"(DHH)":[19],"people,":[20],"due":[21],"to":[22,28,75,105],"its":[23],"accuracy,":[24],"reliability,":[25],"ability":[27],"provide":[29],"holistic":[31],"view":[32],"the":[34,52,61,143],"conversational":[35],"environment":[36],"(e.g.,":[37,47],"displaying":[39],"speaker":[40],"names).":[41],"However,":[42],"in":[43,79,86,109],"many":[44],"real-world":[45],"situations":[46],"noisy":[48,116],"environments,":[49],"long":[50],"meetings),":[51],"CART":[53,77,88,125],"accuracy":[55],"can":[56],"considerably":[57],"decline,":[58],"thereby":[59],"affecting":[60],"comprehension":[62],"DHH":[64,139],"people.":[65],"In":[66],"this":[67],"work-in-progress":[68],"paper,":[69],"we":[70],"introduce":[71],"CARTGPT,":[72],"system":[74,122],"assist":[76],"captioners":[78],"improving":[80],"their":[81],"transcription":[82],"accuracy.":[83],"CARTGPT":[84],"takes":[85],"errored":[87],"captions":[89,96,108],"inaccurate":[91],"automatic":[92],"speech":[93,117],"recognition":[94],"(ASR)":[95],"as":[97],"input":[98],"uses":[100],"large":[102],"language":[103],"model":[104,132],"generate":[106],"corrected":[107],"real-time.":[110],"We":[111],"quantified":[112],"performance":[113],"on":[114],"dataset,":[118],"showing":[119],"that":[120],"our":[121,146],"outperforms":[123],"both":[124],"(+5.6%":[126],"accuracy)":[127],"state-of-the-art":[130],"ASR":[131],"(+17.3%).":[133],"A":[134],"preliminary":[135],"evaluation":[136],"with":[137],"three":[138],"users":[140],"further":[141],"demonstrates":[142],"promise":[144],"approach.":[147]},"counts_by_year":[{"year":2025,"cited_by_count":2}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
