{"id":"https://openalex.org/W4416036657","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.426","title":"When Annotators Disagree, Topology Explains: Mapper, a Topological Tool for Exploring Text Embedding Geometry and Ambiguity","display_name":"When Annotators Disagree, Topology Explains: Mapper, a Topological Tool for Exploring Text Embedding Geometry and Ambiguity","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036657","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.426"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.426","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.426","pdf_url":"https://aclanthology.org/2025.emnlp-main.426.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.426.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5120282347","display_name":"Nisrine Rair","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Nisrine Rair","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5066371865","display_name":"Alban Goupil","orcid":"https://orcid.org/0000-0003-4308-9968"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alban Goupil","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082749809","display_name":"Val\u00e9riu Vrabie","orcid":"https://orcid.org/0000-0003-4249-0207"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Valeriu Vrabie","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5120282348","display_name":"Emmanuel Chochoy","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Emmanuel Chochoy","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5120282347"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.35258265,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"8468","last_page":"8491"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.21960000693798065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10799","display_name":"Data Visualization and Analytics","score":0.21960000693798065,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.10530000180006027,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.04259999841451645,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/embedding","display_name":"Embedding","score":0.6888999938964844},{"id":"https://openalex.org/keywords/topology","display_name":"Topology (electrical circuits)","score":0.6079000234603882},{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.5497999787330627},{"id":"https://openalex.org/keywords/topological-data-analysis","display_name":"Topological data analysis","score":0.4104999899864197},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.3734000027179718},{"id":"https://openalex.org/keywords/visualization","display_name":"Visualization","score":0.3479999899864197}],"concepts":[{"id":"https://openalex.org/C41608201","wikidata":"https://www.wikidata.org/wiki/Q980509","display_name":"Embedding","level":2,"score":0.6888999938964844},{"id":"https://openalex.org/C184720557","wikidata":"https://www.wikidata.org/wiki/Q7825049","display_name":"Topology (electrical circuits)","level":2,"score":0.6079000234603882},{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.5497999787330627},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5449000000953674},{"id":"https://openalex.org/C2776477805","wikidata":"https://www.wikidata.org/wiki/Q4460773","display_name":"Topological data analysis","level":2,"score":0.4104999899864197},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.3734000027179718},{"id":"https://openalex.org/C36464697","wikidata":"https://www.wikidata.org/wiki/Q451553","display_name":"Visualization","level":2,"score":0.3479999899864197},{"id":"https://openalex.org/C156135169","wikidata":"https://www.wikidata.org/wiki/Q5535522","display_name":"Geometry and topology","level":2,"score":0.3228999972343445},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.321399986743927},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.31060001254081726},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.30329999327659607},{"id":"https://openalex.org/C529865628","wikidata":"https://www.wikidata.org/wiki/Q1790740","display_name":"Manifold (fluid mechanics)","level":2,"score":0.29600000381469727},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.2777999937534332},{"id":"https://openalex.org/C2524010","wikidata":"https://www.wikidata.org/wiki/Q8087","display_name":"Geometry","level":1,"score":0.2734000086784363},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.25209999084472656},{"id":"https://openalex.org/C29123130","wikidata":"https://www.wikidata.org/wiki/Q874709","display_name":"Computational geometry","level":2,"score":0.25}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.426","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.426","pdf_url":"https://aclanthology.org/2025.emnlp-main.426.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.426","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.426","pdf_url":"https://aclanthology.org/2025.emnlp-main.426.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036657.pdf","grobid_xml":"https://content.openalex.org/works/W4416036657.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Language":[0],"models":[1,17,34,131],"are":[2,156],"often":[3],"evaluated":[4],"with":[5,65,83],"scalar":[6],"metrics":[7,139],"like":[8],"accuracy,":[9],"but":[10],"such":[11,102],"measures":[12],"fail":[13],"to":[14,30,41],"capture":[15],"how":[16,32,130],"internally":[18],"represent":[19],"ambiguity,":[20],"especially":[21],"when":[22],"human":[23],"annotators":[24],"disagree.We":[25],"propose":[26],"a":[27,48,91,124],"topological":[28,51,138],"perspective":[29],"analyze":[31],"fine-tuned":[33],"encode":[35],"ambiguity":[36],"and":[37,97,117,153],"more":[38],"generally":[39],"instances.Applied":[40],"RoBERTa-Large":[42],"on":[43],"the":[44],"MD-Offense":[45],"dataset,":[46],"Mapper,":[47],"tool":[49,127],"from":[50],"data":[52],"analysis,":[53],"reveals":[54],"that":[55,140],"fine-tuning":[56],"restructures":[57],"embedding":[58],"space":[59],"into":[60],"modular,":[61],"non-convex":[62],"regions":[63],"aligned":[64],"model":[66],"predictions,":[67],"even":[68],"for":[69,128],"highly":[70],"ambiguous":[71,88],"cases.Over":[72],"98%":[73],"of":[74],"connected":[75],"components":[76],"exhibit":[77],"90%":[78],"prediction":[79],"purity,":[80],"yet":[81],"alignment":[82],"ground-truth":[84],"labels":[85],"drops":[86],"in":[87,146],"data,":[89],"surfacing":[90],"hidden":[92],"tension":[93],"between":[94],"structural":[95],"confidence":[96],"label":[98],"uncertainty.Unlike":[99],"traditional":[100],"tools":[101],"as":[103,123],"PCA":[104],"or":[105],"UMAP,":[106],"Mapper":[107,122],"captures":[108],"this":[109],"geometry":[110],"directly":[111],"uncovering":[112],"decision":[113],"regions,":[114],"boundary":[115],"collapses,":[116],"overconfident":[118],"clusters.Our":[119],"findings":[120],"position":[121],"powerful":[125],"diagnostic":[126],"understanding":[129],"resolve":[132],"ambiguity.Beyond":[133],"visualization,":[134],"it":[135],"also":[136],"enables":[137],"may":[141],"inform":[142],"proactive":[143],"modeling":[144],"strategies":[145],"subjective":[147],"NLP":[148],"tasks.For":[149],"reproducibility,":[150],"all":[151],"code":[152],"experiment":[154],"configurations":[155],"released":[157],"1":[158],".":[159]},"counts_by_year":[],"updated_date":"2026-03-11T06:11:40.159057","created_date":"2025-11-08T00:00:00"}
