{"id":"https://openalex.org/W7128787487","doi":"https://doi.org/10.48550/arxiv.2602.11871","title":"DMAP: A Distribution Map for Text","display_name":"DMAP: A Distribution Map for Text","publication_year":2026,"publication_date":"2026-02-12","ids":{"openalex":"https://openalex.org/W7128787487","doi":"https://doi.org/10.48550/arxiv.2602.11871"},"language":null,"primary_location":{"id":"pmh:doi:10.48550/arxiv.2602.11871","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":null,"any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5073551407","display_name":"Tom Kempton","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Kempton, Tom","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5033486362","display_name":"Julia Rozanova","orcid":"https://orcid.org/0000-0002-9971-6767"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Rozanova, Julia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125951067","display_name":"Parameswaran Kamalaruban","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kamalaruban, Parameswaran","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5047161303","display_name":"Maeve Madigan","orcid":"https://orcid.org/0000-0003-2485-8960"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Madigan, Maeve","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125915344","display_name":"Karolina Wresilo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wresilo, Karolina","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125946914","display_name":"Yoann L. Launay","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Launay, Yoann L.","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5125953612","display_name":"David Sutton","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sutton, David","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5125889107","display_name":"Stuart Burrell","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Burrell, Stuart","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5073551407"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.26589998602867126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12380","display_name":"Authorship Attribution and Profiling","score":0.26589998602867126,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.08749999850988388,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.06419999897480011,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/probability-distribution","display_name":"Probability distribution","score":0.5618000030517578},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.5299000144004822},{"id":"https://openalex.org/keywords/conditional-probability","display_name":"Conditional probability","score":0.5284000039100647},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.5209000110626221},{"id":"https://openalex.org/keywords/statistical-model","display_name":"Statistical model","score":0.5188999772071838},{"id":"https://openalex.org/keywords/range","display_name":"Range (aeronautics)","score":0.5059000253677368},{"id":"https://openalex.org/keywords/rank","display_name":"Rank (graph theory)","score":0.4690999984741211},{"id":"https://openalex.org/keywords/interval","display_name":"Interval (graph theory)","score":0.3903000056743622},{"id":"https://openalex.org/keywords/conditional-probability-distribution","display_name":"Conditional probability distribution","score":0.3833000063896179},{"id":"https://openalex.org/keywords/joint-probability-distribution","display_name":"Joint probability distribution","score":0.37130001187324524}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.6208999752998352},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.5618000030517578},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.5299000144004822},{"id":"https://openalex.org/C44492722","wikidata":"https://www.wikidata.org/wiki/Q327069","display_name":"Conditional probability","level":2,"score":0.5284000039100647},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.5209000110626221},{"id":"https://openalex.org/C114289077","wikidata":"https://www.wikidata.org/wiki/Q3284399","display_name":"Statistical model","level":2,"score":0.5188999772071838},{"id":"https://openalex.org/C204323151","wikidata":"https://www.wikidata.org/wiki/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.5059000253677368},{"id":"https://openalex.org/C164226766","wikidata":"https://www.wikidata.org/wiki/Q7293202","display_name":"Rank (graph theory)","level":2,"score":0.4690999984741211},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.41749998927116394},{"id":"https://openalex.org/C2778067643","wikidata":"https://www.wikidata.org/wiki/Q166507","display_name":"Interval (graph theory)","level":2,"score":0.3903000056743622},{"id":"https://openalex.org/C43555835","wikidata":"https://www.wikidata.org/wiki/Q2300258","display_name":"Conditional probability distribution","level":2,"score":0.3833000063896179},{"id":"https://openalex.org/C18653775","wikidata":"https://www.wikidata.org/wiki/Q1333358","display_name":"Joint probability distribution","level":2,"score":0.37130001187324524},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.34869998693466187},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.3427000045776367},{"id":"https://openalex.org/C2780586882","wikidata":"https://www.wikidata.org/wiki/Q7520643","display_name":"Simple (philosophy)","level":2,"score":0.3407999873161316},{"id":"https://openalex.org/C66746571","wikidata":"https://www.wikidata.org/wiki/Q1134833","display_name":"ENCODE","level":3,"score":0.34060001373291016},{"id":"https://openalex.org/C128394911","wikidata":"https://www.wikidata.org/wiki/Q1987578","display_name":"Unit interval","level":2,"score":0.33739998936653137},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3346000015735626},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.3109999895095825},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.29919999837875366},{"id":"https://openalex.org/C195324797","wikidata":"https://www.wikidata.org/wiki/Q33742","display_name":"Natural language","level":2,"score":0.29030001163482666},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.28459998965263367},{"id":"https://openalex.org/C2777299769","wikidata":"https://www.wikidata.org/wiki/Q3707858","display_name":"Type (biology)","level":2,"score":0.27950000762939453},{"id":"https://openalex.org/C197096303","wikidata":"https://www.wikidata.org/wiki/Q869887","display_name":"Probability mass function","level":3,"score":0.2791000008583069},{"id":"https://openalex.org/C177769412","wikidata":"https://www.wikidata.org/wiki/Q278090","display_name":"Prior probability","level":3,"score":0.2777000069618225},{"id":"https://openalex.org/C21080849","wikidata":"https://www.wikidata.org/wiki/Q13611879","display_name":"Data point","level":2,"score":0.2768999934196472},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.27129998803138733},{"id":"https://openalex.org/C87007009","wikidata":"https://www.wikidata.org/wiki/Q210832","display_name":"Statistical hypothesis testing","level":2,"score":0.2662999927997589},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.26600000262260437},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.26429998874664307},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.2621999979019165},{"id":"https://openalex.org/C195065555","wikidata":"https://www.wikidata.org/wiki/Q214881","display_name":"Curvature","level":2,"score":0.25769999623298645},{"id":"https://openalex.org/C110121322","wikidata":"https://www.wikidata.org/wiki/Q865811","display_name":"Distribution (mathematics)","level":2,"score":0.2531999945640564},{"id":"https://openalex.org/C97933134","wikidata":"https://www.wikidata.org/wiki/Q5374249","display_name":"Empirical probability","level":4,"score":0.25290000438690186},{"id":"https://openalex.org/C166921843","wikidata":"https://www.wikidata.org/wiki/Q3776487","display_name":"Statistical parameter","level":2,"score":0.25270000100135803},{"id":"https://openalex.org/C2986587452","wikidata":"https://www.wikidata.org/wiki/Q938438","display_name":"Statistical analysis","level":2,"score":0.2513999938964844}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:doi:10.48550/arxiv.2602.11871","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550/arxiv.2602.11871","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2602.11871","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:doi:10.48550/arxiv.2602.11871","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https://openalex.org/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Large":[0],"Language":[1],"Models":[2],"(LLMs)":[3],"are":[4],"a":[5,20,45,71,77,80,84,107,141,166,184],"powerful":[6],"tool":[7],"for":[8,39,186],"statistical":[9,145,168],"text":[10,137,171,190],"analysis,":[11],"with":[12,192],"derived":[13],"sequences":[14],"of":[15,22,54,61,86,109,121,132,170],"next-token":[16,47],"probability":[17,48,97,133],"distributions":[18],"offering":[19],"wealth":[21],"information.":[23,98],"Extracting":[24],"this":[25,66],"signal":[26],"typically":[27],"relies":[28],"on":[29,51,157,177],"metrics":[30],"such":[31],"as":[32],"perplexity,":[33],"which":[34],"do":[35],"not":[36],"adequately":[37],"account":[38],"context;":[40],"how":[41],"one":[42],"should":[43],"interpret":[44],"given":[46],"is":[49,173],"dependent":[50],"the":[52,59,62,89,130],"number":[53],"reasonable":[55],"choices":[56],"encoded":[57],"by":[58],"shape":[60],"conditional":[63],"distribution.":[64],"In":[65],"work,":[67],"we":[68],"present":[69],"DMAP,":[70],"mathematically":[72],"grounded":[73],"method":[74],"that":[75,92,151,163,172],"maps":[76],"text,":[78],"via":[79],"language":[81],"model,":[82],"to":[83,124,155,175],"set":[85],"samples":[87],"in":[88,135,148],"unit":[90],"interval":[91],"jointly":[93],"encode":[94],"rank":[95],"and":[96,105,139,182],"This":[99],"representation":[100],"enables":[101],"efficient,":[102],"model-agnostic":[103],"analysis":[104,143,191],"supports":[106],"range":[108],"applications.":[110],"We":[111],"illustrate":[112],"its":[113],"utility":[114],"through":[115],"three":[116],"case":[117],"studies:":[118],"(i)":[119],"validation":[120],"generation":[122],"parameters":[123],"ensure":[125],"data":[126],"integrity,":[127],"(ii)":[128],"examining":[129],"role":[131],"curvature":[134],"machine-generated":[136],"detection,":[138],"(iii)":[140],"forensic":[142],"revealing":[144],"fingerprints":[146],"left":[147],"downstream":[149],"models":[150],"have":[152],"been":[153],"subject":[154],"post-training":[156],"synthetic":[158],"data.":[159],"Our":[160],"results":[161],"demonstrate":[162],"DMAP":[164],"offers":[165],"unified":[167],"view":[169],"simple":[174],"compute":[176],"consumer":[178],"hardware,":[179],"widely":[180],"applicable,":[181],"provides":[183],"foundation":[185],"further":[187],"research":[188],"into":[189],"LLMs.":[193]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2026-02-14T00:00:00"}
