{"id":"https://openalex.org/W4416575445","doi":"https://doi.org/10.3390/computers14120508","title":"NewsSumm: The World\u2019s Largest Human-Annotated Multi-Document News Summarization Dataset for Indian English","display_name":"NewsSumm: The World\u2019s Largest Human-Annotated Multi-Document News Summarization Dataset for Indian English","publication_year":2025,"publication_date":"2025-11-23","ids":{"openalex":"https://openalex.org/W4416575445","doi":"https://doi.org/10.3390/computers14120508"},"language":"en","primary_location":{"id":"doi:10.3390/computers14120508","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14120508","pdf_url":"https://www.mdpi.com/2073-431X/14/12/508/pdf?version=1763975771","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2073-431X/14/12/508/pdf?version=1763975771","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5075272402","display_name":"Manish Motghare","orcid":"https://orcid.org/0000-0002-3239-0205"},"institutions":[{"id":"https://openalex.org/I133978751","display_name":"Rashtrasant Tukadoji Maharaj Nagpur University","ror":"https://ror.org/04esgv207","country_code":"IN","type":"education","lineage":["https://openalex.org/I133978751"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Manish Motghare","raw_affiliation_strings":["Shri Ramdeobaba College of Engineering and Management, Affiliated to Rashtrasant Tukdoji Maharaj Nagpur University, Nagpur 440013, India"],"affiliations":[{"raw_affiliation_string":"Shri Ramdeobaba College of Engineering and Management, Affiliated to Rashtrasant Tukdoji Maharaj Nagpur University, Nagpur 440013, India","institution_ids":["https://openalex.org/I133978751"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5035107404","display_name":"Megha Agarwal","orcid":"https://orcid.org/0000-0003-3434-6555"},"institutions":[{"id":"https://openalex.org/I97018004","display_name":"Stanford University","ror":"https://ror.org/00f54p054","country_code":"US","type":"education","lineage":["https://openalex.org/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Megha Agarwal","raw_affiliation_strings":["School of Medicine, Stanford University, Stanford, CA 94305, USA"],"affiliations":[{"raw_affiliation_string":"School of Medicine, Stanford University, Stanford, CA 94305, USA","institution_ids":["https://openalex.org/I97018004"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5100911461","display_name":"Avinash Agrawal","orcid":null},"institutions":[{"id":"https://openalex.org/I133978751","display_name":"Rashtrasant Tukadoji Maharaj Nagpur University","ror":"https://ror.org/04esgv207","country_code":"IN","type":"education","lineage":["https://openalex.org/I133978751"]},{"id":"https://openalex.org/I4210164862","display_name":"Artificial Intelligence in Medicine (Canada)","ror":"https://ror.org/05p590m36","country_code":"CA","type":"company","lineage":["https://openalex.org/I4210164862"]}],"countries":["CA","IN"],"is_corresponding":false,"raw_author_name":"Avinash Agrawal","raw_affiliation_strings":["Department of Artificial Intelligence and Cyber Security, Ramdeobaba University, Nagpur 440013, India","Shri Ramdeobaba College of Engineering and Management, Affiliated to Rashtrasant Tukdoji Maharaj Nagpur University, Nagpur 440013, India"],"affiliations":[{"raw_affiliation_string":"Department of Artificial Intelligence and Cyber Security, Ramdeobaba University, Nagpur 440013, India","institution_ids":["https://openalex.org/I4210164862"]},{"raw_affiliation_string":"Shri Ramdeobaba College of Engineering and Management, Affiliated to Rashtrasant Tukdoji Maharaj Nagpur University, Nagpur 440013, India","institution_ids":["https://openalex.org/I133978751"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5035107404","https://openalex.org/A5075272402"],"corresponding_institution_ids":["https://openalex.org/I133978751","https://openalex.org/I97018004"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.19080579,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"14","issue":"12","first_page":"508","last_page":"508"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7008000016212463,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7008000016212463,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.03700000047683716,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T13910","display_name":"Computational and Text Analysis Methods","score":0.03400000184774399,"subfield":{"id":"https://openalex.org/subfields/3300","display_name":"General Social Sciences"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/automatic-summarization","display_name":"Automatic summarization","score":0.8069999814033508},{"id":"https://openalex.org/keywords/newspaper","display_name":"Newspaper","score":0.7117000222206116},{"id":"https://openalex.org/keywords/timeline","display_name":"Timeline","score":0.614799976348877},{"id":"https://openalex.org/keywords/journalism","display_name":"Journalism","score":0.5181999802589417},{"id":"https://openalex.org/keywords/consistency","display_name":"Consistency (knowledge bases)","score":0.4810999929904938},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.47369998693466187},{"id":"https://openalex.org/keywords/indian-english","display_name":"Indian English","score":0.4629000127315521},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.44350001215934753},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.41839998960494995}],"concepts":[{"id":"https://openalex.org/C170858558","wikidata":"https://www.wikidata.org/wiki/Q1394144","display_name":"Automatic summarization","level":2,"score":0.8069999814033508},{"id":"https://openalex.org/C201280247","wikidata":"https://www.wikidata.org/wiki/Q11032","display_name":"Newspaper","level":2,"score":0.7117000222206116},{"id":"https://openalex.org/C4438859","wikidata":"https://www.wikidata.org/wiki/Q186117","display_name":"Timeline","level":2,"score":0.614799976348877},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5934000015258789},{"id":"https://openalex.org/C119513131","wikidata":"https://www.wikidata.org/wiki/Q11030","display_name":"Journalism","level":2,"score":0.5181999802589417},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.49729999899864197},{"id":"https://openalex.org/C2776436953","wikidata":"https://www.wikidata.org/wiki/Q5163215","display_name":"Consistency (knowledge bases)","level":2,"score":0.4810999929904938},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.47369998693466187},{"id":"https://openalex.org/C2778443833","wikidata":"https://www.wikidata.org/wiki/Q1348800","display_name":"Indian English","level":2,"score":0.4629000127315521},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.44350001215934753},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.41839998960494995},{"id":"https://openalex.org/C157657479","wikidata":"https://www.wikidata.org/wiki/Q2367247","display_name":"Closed captioning","level":3,"score":0.40470001101493835},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.40450000762939453},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38989999890327454},{"id":"https://openalex.org/C139807058","wikidata":"https://www.wikidata.org/wiki/Q352374","display_name":"Adaptation (eye)","level":2,"score":0.38909998536109924},{"id":"https://openalex.org/C2776434776","wikidata":"https://www.wikidata.org/wiki/Q19246213","display_name":"Domain adaptation","level":3,"score":0.3822999894618988},{"id":"https://openalex.org/C206345919","wikidata":"https://www.wikidata.org/wiki/Q20380951","display_name":"Resource (disambiguation)","level":2,"score":0.36239999532699585},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.35929998755455017},{"id":"https://openalex.org/C2987496018","wikidata":"https://www.wikidata.org/wiki/Q1860","display_name":"English language","level":2,"score":0.35010001063346863},{"id":"https://openalex.org/C134714966","wikidata":"https://www.wikidata.org/wiki/Q6934448","display_name":"Multi-document summarization","level":3,"score":0.3375999927520752},{"id":"https://openalex.org/C158154518","wikidata":"https://www.wikidata.org/wiki/Q7310970","display_name":"Relevance (law)","level":2,"score":0.3327000141143799},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.32260000705718994},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.3147999942302704},{"id":"https://openalex.org/C2775924081","wikidata":"https://www.wikidata.org/wiki/Q55608371","display_name":"Control (management)","level":2,"score":0.31349998712539673},{"id":"https://openalex.org/C4679612","wikidata":"https://www.wikidata.org/wiki/Q866298","display_name":"Aggregate (composite)","level":2,"score":0.31060001254081726},{"id":"https://openalex.org/C95457728","wikidata":"https://www.wikidata.org/wiki/Q309","display_name":"History","level":0,"score":0.28929999470710754},{"id":"https://openalex.org/C531593650","wikidata":"https://www.wikidata.org/wiki/Q7167","display_name":"Colonialism","level":2,"score":0.27070000767707825},{"id":"https://openalex.org/C2776331648","wikidata":"https://www.wikidata.org/wiki/Q3545554","display_name":"World Englishes","level":2,"score":0.26669999957084656},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2630999982357025},{"id":"https://openalex.org/C75165309","wikidata":"https://www.wikidata.org/wiki/Q2258979","display_name":"Search engine indexing","level":2,"score":0.26109999418258667},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.2517000138759613}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.3390/computers14120508","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14120508","pdf_url":"https://www.mdpi.com/2073-431X/14/12/508/pdf?version=1763975771","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},{"id":"pmh:doi:10.5281/zenodo.17670864","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Dataset"},{"id":"pmh:oai:doaj.org/article:393e9b44ddcf4ef6a6dd2ffdccc59a18","is_oa":true,"landing_page_url":"https://doaj.org/article/393e9b44ddcf4ef6a6dd2ffdccc59a18","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computers, Vol 14, Iss 12, p 508 (2025)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.3390/computers14120508","is_oa":true,"landing_page_url":"https://doi.org/10.3390/computers14120508","pdf_url":"https://www.mdpi.com/2073-431X/14/12/508/pdf?version=1763975771","source":{"id":"https://openalex.org/S4210228075","display_name":"Computers","issn_l":"2073-431X","issn":["2073-431X"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computers","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416575445.pdf","grobid_xml":"https://content.openalex.org/works/W4416575445.grobid-xml"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W2888482885","https://openalex.org/W2897803242","https://openalex.org/W2962849707","https://openalex.org/W2962965405","https://openalex.org/W2963926728","https://openalex.org/W2989743967","https://openalex.org/W3034188538","https://openalex.org/W3034999214","https://openalex.org/W3102645206","https://openalex.org/W3104404235","https://openalex.org/W3106234277","https://openalex.org/W3139403840","https://openalex.org/W3159259047","https://openalex.org/W3170432046","https://openalex.org/W3171639395","https://openalex.org/W3186721116","https://openalex.org/W3187773669","https://openalex.org/W3211758383","https://openalex.org/W4210265293","https://openalex.org/W4285141065","https://openalex.org/W4382202377","https://openalex.org/W4389524002","https://openalex.org/W4399489935","https://openalex.org/W4399803256","https://openalex.org/W4400529019","https://openalex.org/W4401042307","https://openalex.org/W4401042316","https://openalex.org/W4401042674","https://openalex.org/W4401043242","https://openalex.org/W4401046976","https://openalex.org/W4401244769","https://openalex.org/W4402227266","https://openalex.org/W4402638397","https://openalex.org/W4402640274","https://openalex.org/W4403544979","https://openalex.org/W4407085283","https://openalex.org/W4410949762","https://openalex.org/W4410987728","https://openalex.org/W4411119344","https://openalex.org/W4412162018","https://openalex.org/W4412393339","https://openalex.org/W4412393348","https://openalex.org/W4412841376","https://openalex.org/W4414091824","https://openalex.org/W4416028060"],"related_works":[],"abstract_inverted_index":{"The":[0],"rapid":[1],"growth":[2],"of":[3,32],"digital":[4],"journalism":[5],"has":[6],"heightened":[7],"the":[8,58,73],"need":[9],"for":[10,63,167,178],"reliable":[11],"multi-document":[12],"summarization":[13],"(MDS)":[14],"systems,":[15],"particularly":[16],"in":[17,170],"underrepresented,":[18],"low-resource,":[19],"and":[20,42,85,111,115,120,127,136,143,175,181],"culturally":[21],"distinct":[22],"contexts.":[23],"However,":[24],"current":[25],"progress":[26],"is":[27],"hindered":[28],"by":[29,45,67],"a":[30,164],"lack":[31],"large-scale,":[33],"high-quality":[34],"non-Western":[35],"datasets.":[36],"Existing":[37],"benchmarks\u2014such":[38],"as":[39,147,149],"CNN/DailyMail,":[40],"XSum,":[41],"MultiNews\u2014are":[43],"limited":[44],"language,":[46],"regional":[47],"focus,":[48],"or":[49],"reliance":[50],"on":[51,139],"noisy,":[52],"auto-generated":[53],"summaries.":[54,104],"We":[55,105],"introduce":[56],"NewsSumm,":[57,140],"largest":[59],"human-annotated":[60],"MDS":[61],"dataset":[62,155],"Indian":[64,78,179],"English,":[65],"curated":[66],"over":[68,94],"14,000":[69],"expert":[70],"annotators":[71],"through":[72],"Suvidha":[74],"Foundation.":[75],"Spanning":[76],"36":[77],"English":[79,180],"newspapers":[80],"from":[81],"2000":[82],"to":[83],"2025":[84],"covering":[86],"more":[87],"than":[88],"20":[89],"topical":[90],"categories,":[91],"NewsSumm":[92,154,162],"includes":[93],"317,498":[95],"articles":[96],"paired":[97],"with":[98],"factually":[99],"accurate,":[100],"professionally":[101],"written":[102],"abstractive":[103],"detail":[106],"its":[107,125],"robust":[108],"collection,":[109],"annotation,":[110],"quality":[112],"control":[113],"pipelines,":[114],"present":[116],"extensive":[117],"statistical,":[118],"linguistic,":[119],"temporal":[121],"analyses":[122],"that":[123],"underscore":[124],"scale":[126],"diversity.":[128],"To":[129],"establish":[130],"benchmarks,":[131],"we":[132],"evaluate":[133],"PEGASUS,":[134],"BART,":[135],"T5":[137],"models":[138],"reporting":[141],"aggregate":[142],"category-specific":[144],"ROUGE":[145],"scores,":[146],"well":[148],"factual":[150],"consistency":[151],"metrics.":[152],"All":[153],"materials":[156],"are":[157],"openly":[158],"released":[159],"via":[160],"Zenodo.":[161],"offers":[163],"foundational":[165],"resource":[166],"advancing":[168],"research":[169],"summarization,":[171],"factuality,":[172],"timeline":[173],"synthesis,":[174],"domain":[176],"adaptation":[177],"other":[182],"low-resource":[183],"language":[184],"settings.":[185]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-11-25T00:00:00"}
