{"id":"https://openalex.org/W7152447921","doi":"https://doi.org/10.48550/arxiv.2604.06205","title":"Tool-MCoT: Tool Augmented Multimodal Chain-of-Thought for Content Safety Moderation","display_name":"Tool-MCoT: Tool Augmented Multimodal Chain-of-Thought for Content Safety Moderation","publication_year":2026,"publication_date":"2026-03-15","ids":{"openalex":"https://openalex.org/W7152447921","doi":"https://doi.org/10.48550/arxiv.2604.06205"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.06205","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06205","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.06205","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5133260958","display_name":"Shutong Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shutong","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5128662286","display_name":"Dylan Zhou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Dylan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5109012562","display_name":"Yinxiao Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Yinxiao","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133307819","display_name":"Yang Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Yang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5133276307","display_name":"Huiwen Luo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Luo, Huiwen","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5133259970","display_name":"Wenfei Zou","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zou, Wenfei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9437999725341797,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12262","display_name":"Hate Speech and Cyberbullying Detection","score":0.9437999725341797,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.013299999758601189,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.01209999993443489,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/moderation","display_name":"Moderation","score":0.6955000162124634},{"id":"https://openalex.org/keywords/scalability","display_name":"Scalability","score":0.6342999935150146},{"id":"https://openalex.org/keywords/inference","display_name":"Inference","score":0.516700029373169},{"id":"https://openalex.org/keywords/latency","display_name":"Latency (audio)","score":0.4945000112056732},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.4537999927997589},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.39410001039505005}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7605000138282776},{"id":"https://openalex.org/C93225998","wikidata":"https://www.wikidata.org/wiki/Q1941972","display_name":"Moderation","level":2,"score":0.6955000162124634},{"id":"https://openalex.org/C48044578","wikidata":"https://www.wikidata.org/wiki/Q727490","display_name":"Scalability","level":2,"score":0.6342999935150146},{"id":"https://openalex.org/C2776214188","wikidata":"https://www.wikidata.org/wiki/Q408386","display_name":"Inference","level":2,"score":0.516700029373169},{"id":"https://openalex.org/C82876162","wikidata":"https://www.wikidata.org/wiki/Q17096504","display_name":"Latency (audio)","level":2,"score":0.4945000112056732},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.4537999927997589},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.39980000257492065},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.39410001039505005},{"id":"https://openalex.org/C174348530","wikidata":"https://www.wikidata.org/wiki/Q188635","display_name":"Bridging (networking)","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C201025465","wikidata":"https://www.wikidata.org/wiki/Q11248500","display_name":"User experience design","level":2,"score":0.28360000252723694},{"id":"https://openalex.org/C2778152352","wikidata":"https://www.wikidata.org/wiki/Q5165061","display_name":"Content (measure theory)","level":2,"score":0.2824999988079071},{"id":"https://openalex.org/C49774154","wikidata":"https://www.wikidata.org/wiki/Q131765","display_name":"Multimedia","level":1,"score":0.27649998664855957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2694999873638153},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.26019999384880066}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.06205","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06205","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.06205","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.06205","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","score":0.7008842825889587,"id":"https://metadata.un.org/sdg/16"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"growth":[1],"of":[2],"online":[3],"platforms":[4],"and":[5,33,87,119],"user":[6],"content":[7,10,54],"requires":[8],"strong":[9],"moderation":[11,56,117],"systems":[12],"that":[13,73,92,103],"can":[14,76,106],"handle":[15],"complex":[16],"inputs":[17],"from":[18],"various":[19],"media":[20],"types.":[21],"While":[22],"large":[23],"language":[24,49],"models":[25],"(LLMs)":[26],"are":[27],"effective,":[28],"their":[29],"high":[30],"computational":[31],"cost":[32],"latency":[34],"present":[35],"significant":[36,97],"challenges":[37],"for":[38,53],"scalable":[39],"deployment.":[40],"To":[41],"address":[42],"this,":[43],"we":[44,71,101],"introduce":[45],"Tool-MCoT,":[46],"a":[47,114],"small":[48],"model":[50,63,105],"(SLM)":[51],"fine-tuned":[52,94],"safety":[55],"leveraging":[57],"external":[58],"framework.":[59],"By":[60],"training":[61],"our":[62],"on":[64],"tool-augmented":[65],"chain-of-thought":[66],"data":[67],"generated":[68],"by":[69,122],"LLM,":[70],"demonstrate":[72],"the":[74,93,104],"SLM":[75,95],"learn":[77,107],"to":[78,83,108],"effectively":[79],"utilize":[80],"these":[81,110],"tools":[82,111,124],"improve":[84],"its":[85],"reasoning":[86],"decision-making.":[88],"Our":[89],"experiments":[90],"show":[91,102],"achieves":[96],"performance":[98],"gains.":[99],"Furthermore,":[100],"use":[109],"selectively,":[112],"achieving":[113],"balance":[115],"between":[116],"accuracy":[118],"inference":[120],"efficiency":[121],"calling":[123],"only":[125],"when":[126],"necessary.":[127]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-04-10T00:00:00"}
