{"id":"https://openalex.org/W7125265681","doi":"https://doi.org/10.48550/arxiv.2601.12491","title":"VASTU: Value-Aligned Social Toolkit for Online Content Curation","display_name":"VASTU: Value-Aligned Social Toolkit for Online Content Curation","publication_year":2026,"publication_date":"2026-01-18","ids":{"openalex":"https://openalex.org/W7125265681","doi":"https://doi.org/10.48550/arxiv.2601.12491"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.12491","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12491","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.12491","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5123514388","display_name":"Agam Goyal","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Goyal, Agam","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5114337814","display_name":"Xianyang Zhan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhan, Xianyang","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123457925","display_name":"Charlotte Lambert","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lambert, Charlotte","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5123505220","display_name":"Koustuv Saha","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saha, Koustuv","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5123537496","display_name":"Eshwar Chandrasekharan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chandrasekharan, Eshwar","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.25110000371932983,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.25110000371932983,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.10949999839067459,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.09059999883174896,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/sociotechnical-system","display_name":"Sociotechnical system","score":0.5879999995231628},{"id":"https://openalex.org/keywords/benchmark","display_name":"Benchmark (surveying)","score":0.4925000071525574},{"id":"https://openalex.org/keywords/ranking","display_name":"Ranking (information retrieval)","score":0.47679999470710754},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4196000099182129},{"id":"https://openalex.org/keywords/social-media","display_name":"Social media","score":0.40959998965263367},{"id":"https://openalex.org/keywords/online-community","display_name":"Online community","score":0.36320000886917114},{"id":"https://openalex.org/keywords/learning-to-rank","display_name":"Learning to rank","score":0.33239999413490295},{"id":"https://openalex.org/keywords/content-analysis","display_name":"Content analysis","score":0.3294999897480011}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6855000257492065},{"id":"https://openalex.org/C127627568","wikidata":"https://www.wikidata.org/wiki/Q1639361","display_name":"Sociotechnical system","level":2,"score":0.5879999995231628},{"id":"https://openalex.org/C185798385","wikidata":"https://www.wikidata.org/wiki/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.4925000071525574},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.49140000343322754},{"id":"https://openalex.org/C189430467","wikidata":"https://www.wikidata.org/wiki/Q7293293","display_name":"Ranking (information retrieval)","level":2,"score":0.47679999470710754},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.4410000145435333},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4196000099182129},{"id":"https://openalex.org/C518677369","wikidata":"https://www.wikidata.org/wiki/Q202833","display_name":"Social media","level":2,"score":0.40959998965263367},{"id":"https://openalex.org/C2778165684","wikidata":"https://www.wikidata.org/wiki/Q6576792","display_name":"Online community","level":2,"score":0.36320000886917114},{"id":"https://openalex.org/C86037889","wikidata":"https://www.wikidata.org/wiki/Q4330127","display_name":"Learning to rank","level":3,"score":0.33239999413490295},{"id":"https://openalex.org/C162446236","wikidata":"https://www.wikidata.org/wiki/Q653137","display_name":"Content analysis","level":2,"score":0.3294999897480011},{"id":"https://openalex.org/C93225998","wikidata":"https://www.wikidata.org/wiki/Q1941972","display_name":"Moderation","level":2,"score":0.3292999863624573},{"id":"https://openalex.org/C91632574","wikidata":"https://www.wikidata.org/wiki/Q15088675","display_name":"Data curation","level":2,"score":0.32420000433921814},{"id":"https://openalex.org/C2775953033","wikidata":"https://www.wikidata.org/wiki/Q5276060","display_name":"Digital curation","level":2,"score":0.3124000132083893},{"id":"https://openalex.org/C56739046","wikidata":"https://www.wikidata.org/wiki/Q192060","display_name":"Knowledge management","level":1,"score":0.30090001225471497},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.29980000853538513},{"id":"https://openalex.org/C4554734","wikidata":"https://www.wikidata.org/wiki/Q593744","display_name":"Knowledge base","level":2,"score":0.2888999879360199},{"id":"https://openalex.org/C101293273","wikidata":"https://www.wikidata.org/wiki/Q579716","display_name":"User-generated content","level":3,"score":0.28540000319480896},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.28279998898506165},{"id":"https://openalex.org/C79416737","wikidata":"https://www.wikidata.org/wiki/Q2305519","display_name":"Social learning","level":2,"score":0.2700999975204468},{"id":"https://openalex.org/C126349790","wikidata":"https://www.wikidata.org/wiki/Q905036","display_name":"Computational sociology","level":2,"score":0.2606000006198883},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.2565999925136566}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.12491","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12491","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.12491","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.12491","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7625739574432373,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Detecting":[0],"what":[1],"content":[2,18],"communities":[3],"value":[4],"is":[5],"a":[6,57,72,173],"foundational":[7],"challenge":[8],"for":[9,53,62],"social":[10],"computing":[11],"systems":[12],"--":[13],"from":[14,77],"feed":[15],"curation":[16],"and":[17,23,35,59,87,99,149],"ranking":[19],"to":[20,66,176],"moderation":[21],"tools":[22],"personalized":[24],"recommendation":[25],"systems.":[26,182],"Yet":[27],"existing":[28],"approaches":[29,65],"remain":[30],"fragmented":[31],"across":[32],"methodological":[33],"paradigms,":[34],"it":[36],"remains":[37],"unclear":[38],"which":[39],"methods":[40],"best":[41],"capture":[42],"community-specific":[43,106,112],"notions":[44],"of":[45,74],"value.":[46],"We":[47,109],"introduce":[48],"VASTU":[49,70],"(Value-Aligned":[50],"Social":[51],"Toolkit":[52],"Online":[54],"Content":[55],"Curation),":[56],"benchmark":[58,175],"evaluation":[60],"framework":[61],"systematically":[63],"comparing":[64],"detecting":[67],"community-valued":[68],"content.":[69],"includes":[71],"dataset":[73],"75,000":[75],"comments":[76],"15":[78],"diverse":[79],"Reddit":[80],"communities,":[81],"annotated":[82],"with":[83,118],"community":[84,162],"approval":[85],"labels":[86],"rich":[88],"linguistic":[89],"features.":[90],"Using":[91],"VASTU,":[92,170],"we":[93,171],"evaluate":[94],"feature-based":[95],"models,":[96],"transformers,":[97],"prompted":[98,134],"fine-tuned":[100,119,128],"language":[101],"models":[102,113,151],"under":[103],"global":[104,116],"versus":[105],"training":[107],"regimes.":[108],"find":[110],"that":[111],"consistently":[114],"outperform":[115,133],"approaches,":[117],"transformers":[120],"achieving":[121],"the":[122,153],"strongest":[123],"performance":[124],"(0.72":[125],"AUROC).":[126],"Notably,":[127],"SLMs":[129],"(0.65":[130],"AUROC)":[131,137],"substantially":[132],"LLMs":[135],"(0.60":[136],"despite":[138],"being":[139],"100":[140],"times":[141],"smaller.":[142],"Counterintuitively,":[143],"chain-of-thought":[144],"prompting":[145],"provides":[146],"no":[147],"benefit,":[148],"reasoning":[150],"perform":[152],"worst":[154],"(0.53":[155],"AUROC),":[156],"suggesting":[157],"this":[158],"task":[159],"requires":[160],"learning":[161],"norms":[163],"rather":[164],"than":[165],"test-time":[166],"reasoning.":[167],"By":[168],"releasing":[169],"provide":[172],"standardized":[174],"advance":[177],"research":[178],"on":[179],"value-aligned":[180],"sociotechnical":[181]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-01-22T00:00:00"}
