{"id":"https://openalex.org/W4200237457","doi":"https://doi.org/10.1162/coli_a_00429","title":"Novelty Detection: A Perspective from Natural Language Processing","display_name":"Novelty Detection: A Perspective from Natural Language Processing","publication_year":2021,"publication_date":"2021-12-22","ids":{"openalex":"https://openalex.org/W4200237457","doi":"https://doi.org/10.1162/coli_a_00429"},"language":"en","primary_location":{"id":"doi:10.1162/coli_a_00429","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00429","pdf_url":"https://direct.mit.edu/coli/article-pdf/48/1/77/2006641/coli_a_00429.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"diamond","oa_url":"https://direct.mit.edu/coli/article-pdf/48/1/77/2006641/coli_a_00429.pdf","any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5081072666","display_name":"Tirthankar Ghosal","orcid":"https://orcid.org/0000-0002-2358-522X"},"institutions":[{"id":"https://openalex.org/I21250087","display_name":"Charles University","ror":"https://ror.org/024d6js02","country_code":"CZ","type":"education","lineage":["https://openalex.org/I21250087"]}],"countries":["CZ"],"is_corresponding":true,"raw_author_name":"Tirthankar Ghosal","raw_affiliation_strings":["Institute of Formal and Applied Linguistics, Faculty of Mathematics and Physics, Charles University, Prague, Czech Republic. ghosal@ufal.mff.cuni.cz"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Institute of Formal and Applied Linguistics, Faculty of Mathematics and Physics, Charles University, Prague, Czech Republic. ghosal@ufal.mff.cuni.cz","institution_ids":["https://openalex.org/I21250087"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050294127","display_name":"Tanik Saikh","orcid":null},"institutions":[{"id":"https://openalex.org/I132153292","display_name":"Indian Institute of Technology Patna","ror":"https://ror.org/01ft5vz71","country_code":"IN","type":"education","lineage":["https://openalex.org/I132153292"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Tanik Saikh","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India. 1821cs08@iitp.ac.in"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India. 1821cs08@iitp.ac.in","institution_ids":["https://openalex.org/I132153292"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5002622744","display_name":"Tameesh Biswas","orcid":null},"institutions":[{"id":"https://openalex.org/I132153292","display_name":"Indian Institute of Technology Patna","ror":"https://ror.org/01ft5vz71","country_code":"IN","type":"education","lineage":["https://openalex.org/I132153292"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Tameesh Biswas","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India. biswas.cs16@iitp.ac.in"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India. biswas.cs16@iitp.ac.in","institution_ids":["https://openalex.org/I132153292"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085370631","display_name":"Asif Ekbal","orcid":"https://orcid.org/0000-0003-3612-8834"},"institutions":[{"id":"https://openalex.org/I132153292","display_name":"Indian Institute of Technology Patna","ror":"https://ror.org/01ft5vz71","country_code":"IN","type":"education","lineage":["https://openalex.org/I132153292"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Asif Ekbal","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India. asif@iitp.ac.in"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology Patna, Patna, India. asif@iitp.ac.in","institution_ids":["https://openalex.org/I132153292"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065100828","display_name":"Pushpak Bhattacharyya","orcid":"https://orcid.org/0000-0001-5319-5508"},"institutions":[{"id":"https://openalex.org/I162827531","display_name":"Indian Institute of Technology Bombay","ror":"https://ror.org/02qyf5152","country_code":"IN","type":"education","lineage":["https://openalex.org/I162827531"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Pushpak Bhattacharyya","raw_affiliation_strings":["Department of Computer Science and Engineering, Indian Institute of Technology Bombay, Powai, India. pb@cse.iitb.ac.in"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Department of Computer Science and Engineering, Indian Institute of Technology Bombay, Powai, India. pb@cse.iitb.ac.in","institution_ids":["https://openalex.org/I162827531"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5002622744","https://openalex.org/A5050294127","https://openalex.org/A5065100828","https://openalex.org/A5081072666","https://openalex.org/A5085370631"],"corresponding_institution_ids":["https://openalex.org/I132153292","https://openalex.org/I162827531","https://openalex.org/I21250087"],"apc_list":null,"apc_paid":null,"fwci":1.3991,"has_fulltext":false,"cited_by_count":21,"citation_normalized_percentile":{"value":0.85383384,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":99},"biblio":{"volume":"48","issue":"1","first_page":"77","last_page":"117"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11147","display_name":"Misinformation and Its Impacts","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/3312","display_name":"Sociology and Political Science"},"field":{"id":"https://openalex.org/fields/33","display_name":"Social Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9926000237464905,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/novelty","display_name":"Novelty","score":0.8787839412689209},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8170046806335449},{"id":"https://openalex.org/keywords/redundancy","display_name":"Redundancy (engineering)","score":0.6157442331314087},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5584819912910461},{"id":"https://openalex.org/keywords/novelty-detection","display_name":"Novelty detection","score":0.5521805882453918},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5488836765289307},{"id":"https://openalex.org/keywords/curiosity","display_name":"Curiosity","score":0.47724318504333496},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.44101932644844055},{"id":"https://openalex.org/keywords/natural-language","display_name":"Natural language","score":0.43308165669441223},{"id":"https://openalex.org/keywords/psychology","display_name":"Psychology","score":0.11784449219703674}],"concepts":[{"id":"https://openalex.org/C2778738651","wikidata":"https://www.wikidata.org/wiki/Q16546687","display_name":"Novelty","level":2,"score":0.8787839412689209},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8170046806335449},{"id":"https://openalex.org/C152124472","wikidata":"https://www.wikidata.org/wiki/Q1204361","display_name":"Redundancy (engineering)","level":2,"score":0.6157442331314087},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5584819912910461},{"id":"https://openalex.org/C2778924833","wikidata":"https://www.wikidata.org/wiki/Q7064603","display_name":"Novelty detection","level":3,"score":0.5521805882453918},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5488836765289307},{"id":"https://openalex.org/C33435437","wikidata":"https://www.wikidata.org/wiki/Q366791","display_name":"Curiosity","level":2,"score":0.47724318504333496},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.44101932644844055},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.43308165669441223},{"id":"https://openalex.org/C15744967","wikidata":"https://www.wikidata.org/wiki/Q9418","display_name":"Psychology","level":0,"score":0.11784449219703674},{"id":"https://openalex.org/C77805123","wikidata":"https://www.wikidata.org/wiki/Q161272","display_name":"Social psychology","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.1162/coli_a_00429","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00429","pdf_url":"https://direct.mit.edu/coli/article-pdf/48/1/77/2006641/coli_a_00429.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},{"id":"pmh:oai:doaj.org/article:8b650e5c1cf9491eb63b54c0552edd05","is_oa":false,"landing_page_url":"https://doaj.org/article/8b650e5c1cf9491eb63b54c0552edd05","pdf_url":null,"source":{"id":"https://openalex.org/S4306401280","display_name":"DOAJ (DOAJ: Directory of Open Access Journals)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Computational Linguistics, Vol 48, Iss 1, Pp 77-117 (2022)","raw_type":"article"}],"best_oa_location":{"id":"doi:10.1162/coli_a_00429","is_oa":true,"landing_page_url":"https://doi.org/10.1162/coli_a_00429","pdf_url":"https://direct.mit.edu/coli/article-pdf/48/1/77/2006641/coli_a_00429.pdf","source":{"id":"https://openalex.org/S155526855","display_name":"Computational Linguistics","issn_l":"0891-2017","issn":["0891-2017","1530-9312"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310320244","host_organization_name":"Association for Computational Linguistics","host_organization_lineage":["https://openalex.org/P4310320244"],"host_organization_lineage_names":["Association for Computational Linguistics"],"type":"journal"},"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Computational Linguistics","raw_type":"journal-article"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","display_name":"Industry, innovation and infrastructure","score":0.46000000834465027}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":98,"referenced_works":["https://openalex.org/W53209706","https://openalex.org/W59585178","https://openalex.org/W1556151859","https://openalex.org/W1593271688","https://openalex.org/W1840435438","https://openalex.org/W1853530259","https://openalex.org/W1965160072","https://openalex.org/W1967925097","https://openalex.org/W1975879668","https://openalex.org/W1981825277","https://openalex.org/W1988107199","https://openalex.org/W1998224037","https://openalex.org/W2005449026","https://openalex.org/W2007760849","https://openalex.org/W2008127487","https://openalex.org/W2024190996","https://openalex.org/W2026430213","https://openalex.org/W2037022968","https://openalex.org/W2046166484","https://openalex.org/W2053959297","https://openalex.org/W2054090049","https://openalex.org/W2055294489","https://openalex.org/W2063392212","https://openalex.org/W2072284402","https://openalex.org/W2080527295","https://openalex.org/W2081798681","https://openalex.org/W2120401584","https://openalex.org/W2130158090","https://openalex.org/W2132314908","https://openalex.org/W2147528976","https://openalex.org/W2340802269","https://openalex.org/W2413794162","https://openalex.org/W2483327705","https://openalex.org/W2554309530","https://openalex.org/W2561570350","https://openalex.org/W2608787653","https://openalex.org/W2889787757","https://openalex.org/W2891177506","https://openalex.org/W2911458603","https://openalex.org/W2913045527","https://openalex.org/W2930957955","https://openalex.org/W2939380783","https://openalex.org/W2958100576","https://openalex.org/W2963241825","https://openalex.org/W2963341956","https://openalex.org/W2963691697","https://openalex.org/W2963748441","https://openalex.org/W2970716846","https://openalex.org/W2977407151","https://openalex.org/W2978797712","https://openalex.org/W2985347336","https://openalex.org/W2999905431","https://openalex.org/W3019155698","https://openalex.org/W3034906811","https://openalex.org/W3038033387","https://openalex.org/W3099023595","https://openalex.org/W3105698638","https://openalex.org/W3138773240","https://openalex.org/W3163479207","https://openalex.org/W3168921237","https://openalex.org/W3215701813","https://openalex.org/W4211148418","https://openalex.org/W4234917632","https://openalex.org/W4238634189","https://openalex.org/W4256300792","https://openalex.org/W4288089799","https://openalex.org/W4391156274","https://openalex.org/W6602621892","https://openalex.org/W6602946292","https://openalex.org/W6628071668","https://openalex.org/W6628747921","https://openalex.org/W6631501603","https://openalex.org/W6631810562","https://openalex.org/W6636593524","https://openalex.org/W6638733343","https://openalex.org/W6640462745","https://openalex.org/W6652326330","https://openalex.org/W6671567030","https://openalex.org/W6672741457","https://openalex.org/W6673599689","https://openalex.org/W6677878908","https://openalex.org/W6679434410","https://openalex.org/W6682631176","https://openalex.org/W6712720983","https://openalex.org/W6729904285","https://openalex.org/W6732303435","https://openalex.org/W6737236263","https://openalex.org/W6737661356","https://openalex.org/W6745573522","https://openalex.org/W6745819962","https://openalex.org/W6748139772","https://openalex.org/W6753082878","https://openalex.org/W6764670288","https://openalex.org/W6769627184","https://openalex.org/W6778883912","https://openalex.org/W6795226064","https://openalex.org/W6845762469","https://openalex.org/W6898505805"],"related_works":["https://openalex.org/W3039898216","https://openalex.org/W2185422427","https://openalex.org/W2478680874","https://openalex.org/W2064636555","https://openalex.org/W2585503716","https://openalex.org/W1939982668","https://openalex.org/W2105014086","https://openalex.org/W2076090200","https://openalex.org/W3025682415","https://openalex.org/W2081173909"],"abstract_inverted_index":{"Abstract":[0],"The":[1,142],"quest":[2],"for":[3,16,180,283,300],"new":[4,43,89],"information":[5,44,61,90,128],"is":[6,51,67,107,151,226],"an":[7,68],"inborn":[8],"human":[9,17],"trait":[10],"and":[11,19,82,91,103,153,184,211,252,274,278,294],"has":[12,41],"always":[13],"been":[14],"quintessential":[15],"survival":[18],"progress.":[20],"Novelty":[21,34],"drives":[22,27],"curiosity,":[23],"which":[24],"in":[25,129,169],"turn":[26],"innovation.":[28],"In":[29,171],"Natural":[30],"Language":[31],"Processing":[32],"(NLP),":[33],"Detection":[35],"refers":[36],"to":[37,45,49,87,158,161,205,267,297],"finding":[38],"text":[39,106,112],"that":[40,221],"some":[42],"offer":[46],"with":[47,207,265],"respect":[48,266],"whatever":[50],"earlier":[52,178],"seen":[53],"or":[54,240],"known.":[55],"With":[56],"the":[57,64,77,100,111,120,146,149,163,166,193,197,213,245,268,272,276,298],"exponential":[58],"growth":[59],"of":[60,71,76,125,148,165,189,199,215,271,280],"all":[62],"across":[63,253],"Web,":[65],"there":[66],"accompanying":[69],"menace":[70],"redundancy.":[72],"A":[73],"considerable":[74],"portion":[75],"Web":[78],"contents":[79],"are":[80],"duplicates,":[81],"we":[83,174],"need":[84,157],"efficient":[85],"mechanisms":[86],"retain":[88],"filter":[92],"out":[93],"redundant":[94],"information.":[95,122],"However,":[96],"detecting":[97],"redundancy":[98],"at":[99],"semantic":[101],"level":[102],"identifying":[104,231],"novel":[105],"not":[108,139],"straightforward":[109],"because":[110],"may":[113,132],"have":[114,133],"less":[115],"lexical":[116],"overlap":[117],"yet":[118],"convey":[119],"same":[121],"On":[123],"top":[124],"that,":[126],"non-novel/redundant":[127],"a":[130,186,222],"document":[131],"assimilated":[134],"from":[135],"multiple":[136,208],"source":[137,209],"documents,":[138,152],"just":[140],"one.":[141],"problem":[143],"surmounts":[144],"when":[145],"subject":[147],"discourse":[150],"numerous":[154],"prior":[155],"documents":[156],"be":[159],"processed":[160],"ascertain":[162],"novelty/non-novelty":[164],"current":[167,217],"one":[168,227],"concern.":[170],"this":[172],"work,":[173],"build":[175],"upon":[176],"our":[177,190,216,263,281,289],"investigations":[179],"document-level":[181,304],"novelty":[182,305],"detection":[183],"present":[185,212,288],"comprehensive":[187],"account":[188],"efforts":[191],"toward":[192,230],"problem.":[194],"We":[195,219,260,286],"explore":[196],"role":[198],"pre-trained":[200],"Textual":[201],"Entailment":[202],"(TE)":[203],"models":[204],"deal":[206],"contexts":[210],"outcome":[214],"investigations.":[218,285],"argue":[220],"multipremise":[223],"entailment":[224],"task":[225],"close":[228],"approximation":[229],"semantic-level":[232],"non-novelty.":[233],"Our":[234],"recent":[235],"approach":[236,282],"either":[237],"performs":[238],"comparably":[239],"achieves":[241],"significant":[242],"improvement":[243],"over":[244],"latest":[246],"reported":[247],"results":[248],"on":[249,303],"several":[250,254,295],"datasets":[251],"related":[255],"tasks":[256],"(paraphrasing,":[257],"plagiarism,":[258],"rewrite).":[259],"critically":[261],"analyze":[262],"performance":[264],"existing":[269],"state":[270],"art":[273],"show":[275],"superiority":[277],"promise":[279],"future":[284],"also":[287],"enhanced":[290],"dataset":[291],"TAP-DLND":[292],"2.0":[293],"baselines":[296],"community":[299],"further":[301],"research":[302],"detection.":[306]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":9},{"year":2024,"cited_by_count":6},{"year":2023,"cited_by_count":4}],"updated_date":"2026-05-23T08:51:43.019350","created_date":"2025-10-10T00:00:00"}
