{"id":"https://openalex.org/W7148854624","doi":"https://doi.org/10.48550/arxiv.2604.02171","title":"Do Lexical and Contextual Coreference Resolution Systems Degrade Differently under Mention Noise? An Empirical Study on Scientific Software Mentions","display_name":"Do Lexical and Contextual Coreference Resolution Systems Degrade Differently under Mention Noise? An Empirical Study on Scientific Software Mentions","publication_year":2026,"publication_date":"2026-04-02","ids":{"openalex":"https://openalex.org/W7148854624","doi":"https://doi.org/10.48550/arxiv.2604.02171"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.02171","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02171","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.02171","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5004409666","display_name":"Atilla Kaan Alkan","orcid":"https://orcid.org/0000-0001-7964-4420"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Alkan, Atilla Kaan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015041023","display_name":"F\u00e9lix Gr\u00e8zes","orcid":"https://orcid.org/0000-0001-8714-7774"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Grezes, Felix","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5065802298","display_name":"Jennifer Bartlett","orcid":"https://orcid.org/0000-0001-7394-4545"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bartlett, Jennifer Lynn","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5132883492","display_name":"Anna Kelbert","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kelbert, Anna","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5077248923","display_name":"Kelly E. Lockhart","orcid":"https://orcid.org/0000-0002-8130-1440"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lockhart, Kelly","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5070393798","display_name":"Alberto Accomazzi","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Accomazzi, Alberto","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5004409666"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5375000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5375000238418579,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.27320000529289246,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.03620000183582306,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/coreference","display_name":"Coreference","score":0.9039999842643738},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6208000183105469},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.5382999777793884},{"id":"https://openalex.org/keywords/empirical-research","display_name":"Empirical research","score":0.5088000297546387},{"id":"https://openalex.org/keywords/point","display_name":"Point (geometry)","score":0.48969998955726624},{"id":"https://openalex.org/keywords/software","display_name":"Software","score":0.48030000925064087},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.4717000126838684},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.459199994802475},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.4196999967098236}],"concepts":[{"id":"https://openalex.org/C28076734","wikidata":"https://www.wikidata.org/wiki/Q63087","display_name":"Coreference","level":3,"score":0.9039999842643738},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7458999752998352},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6208000183105469},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5444999933242798},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.5382999777793884},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5238000154495239},{"id":"https://openalex.org/C120936955","wikidata":"https://www.wikidata.org/wiki/Q2155640","display_name":"Empirical research","level":2,"score":0.5088000297546387},{"id":"https://openalex.org/C28719098","wikidata":"https://www.wikidata.org/wiki/Q44946","display_name":"Point (geometry)","level":2,"score":0.48969998955726624},{"id":"https://openalex.org/C2777904410","wikidata":"https://www.wikidata.org/wiki/Q7397","display_name":"Software","level":2,"score":0.48030000925064087},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.4717000126838684},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.459199994802475},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.4196999967098236},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.41760000586509705},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.38909998536109924},{"id":"https://openalex.org/C138268822","wikidata":"https://www.wikidata.org/wiki/Q1051925","display_name":"Resolution (logic)","level":2,"score":0.387800008058548},{"id":"https://openalex.org/C58166","wikidata":"https://www.wikidata.org/wiki/Q224821","display_name":"Fuzzy logic","level":2,"score":0.3781000077724457},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35989999771118164},{"id":"https://openalex.org/C184337299","wikidata":"https://www.wikidata.org/wiki/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.33809998631477356},{"id":"https://openalex.org/C12725497","wikidata":"https://www.wikidata.org/wiki/Q810247","display_name":"Baseline (sea)","level":2,"score":0.3330000042915344},{"id":"https://openalex.org/C2779662365","wikidata":"https://www.wikidata.org/wiki/Q5416694","display_name":"Event (particle physics)","level":2,"score":0.32670000195503235},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.326200008392334},{"id":"https://openalex.org/C149091818","wikidata":"https://www.wikidata.org/wiki/Q2429814","display_name":"Software system","level":3,"score":0.3160000145435333},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3125},{"id":"https://openalex.org/C547195049","wikidata":"https://www.wikidata.org/wiki/Q1725664","display_name":"Terminology","level":2,"score":0.2994999885559082},{"id":"https://openalex.org/C64543145","wikidata":"https://www.wikidata.org/wiki/Q162942","display_name":"Intersection (aeronautics)","level":2,"score":0.28200000524520874},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.2806999981403351},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.2621000111103058},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.2583000063896179}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.02171","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02171","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.02171","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.02171","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"We":[0,25,180],"present":[1],"our":[2,17,182],"participation":[3],"in":[4],"the":[5,68,74,83,147,165,169,174,177],"SOMD":[6],"2026":[7],"shared":[8],"task":[9],"on":[10,67,188],"cross-document":[11],"software":[12,79],"mention":[13,120,171],"coreference":[14],"resolution,":[15],"where":[16],"systems":[18],"ranked":[19],"second":[20],"across":[21,52],"all":[22,53],"three":[23],"subtasks.":[24],"compare":[26],"two":[27],"fine-tuning-free":[28],"approaches:":[29],"Fuzzy":[30],"Matching":[31],"(FM),":[32],"a":[33],"lexical":[34],"string-similarity":[35],"method,":[36],"and":[37,45,173],"Context":[38],"Aware":[39],"Representations":[40],"(CAR),":[41],"which":[42,81],"combines":[43],"mention-level":[44],"document-level":[46],"embeddings.":[47],"Both":[48],"achieve":[49],"competitive":[50],"performance":[51],"subtasks":[54],"(CoNLL":[55],"F1":[56,105],"of":[57,78,168,176],"0.94-0.96),":[58],"with":[59,73,137],"CAR":[60,101,141,146],"consistently":[61],"outperforming":[62],"FM":[63,122,134],"by":[64,163],"1":[65],"point":[66],"official":[69],"test":[70],"set,":[71],"consistent":[72],"high":[75],"surface":[76],"regularity":[77],"names,":[80],"reduces":[82],"need":[84],"for":[85,116],"complex":[86],"semantic":[87],"reasoning.":[88],"A":[89],"controlled":[90],"noise-injection":[91],"study":[92],"reveals":[93],"complementary":[94],"failure":[95],"modes:":[96],"as":[97],"boundary":[98],"noise":[99,166],"increases,":[100],"loses":[102],"only":[103],"0.07":[104],"points":[106],"from":[107],"clean":[108],"to":[109,114,184],"fully":[110],"corrupted":[111],"input,":[112],"compared":[113],"0.20":[115],"FM,":[117],"whereas":[118,140],"under":[119],"substitution,":[121],"degrades":[123],"more":[124,148],"gracefully":[125],"(0.52":[126],"vs.":[127],"0.63).":[128],"Our":[129],"inference-time":[130],"analysis":[131],"shows":[132],"that":[133,157],"scales":[135,142],"superlinearly":[136],"corpus":[138],"size,":[139],"approximately":[143],"linearly,":[144],"making":[145],"efficient":[149],"choice":[150],"at":[151],"large":[152],"scale.":[153],"These":[154],"findings":[155],"suggest":[156],"system":[158],"selection":[159],"should":[160],"be":[161],"informed":[162],"both":[164],"profile":[167],"upstream":[170],"detector":[172],"scale":[175],"target":[178],"corpus.":[179],"release":[181],"code":[183],"support":[185],"future":[186],"work":[187],"this":[189],"underexplored":[190],"task.":[191]},"counts_by_year":[],"updated_date":"2026-04-04T06:15:33.020886","created_date":"2026-04-04T00:00:00"}
