{"id":"https://openalex.org/W7151437202","doi":"https://doi.org/10.48550/arxiv.2604.03853","title":"Understanding When Poisson Log-Normal Models Outperform Penalized Poisson Regression for Microbiome Count Data","display_name":"Understanding When Poisson Log-Normal Models Outperform Penalized Poisson Regression for Microbiome Count Data","publication_year":2026,"publication_date":"2026-04-04","ids":{"openalex":"https://openalex.org/W7151437202","doi":"https://doi.org/10.48550/arxiv.2604.03853"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2604.03853","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03853","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2604.03853","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5108306203","display_name":"Daniel Agyapong","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Agyapong, Daniel","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5012516981","display_name":"Julien Chiquet","orcid":"https://orcid.org/0000-0002-3629-3429"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Chiquet, Julien","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110261947","display_name":"Jane C. Marks","orcid":"https://orcid.org/0000-0002-6205-1976"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Marks, Jane","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5035679840","display_name":"Toby Dylan Hocking","orcid":"https://orcid.org/0000-0002-3146-0865"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hocking, Toby Dylan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5108306203"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10066","display_name":"Gut microbiota and health","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T10066","display_name":"Gut microbiota and health","score":0.9785000085830688,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10887","display_name":"Bioinformatics and Genomic Networks","score":0.0017999999690800905,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11289","display_name":"Single-cell and spatial transcriptomics","score":0.0010999999940395355,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/count-data","display_name":"Count data","score":0.8797000050544739},{"id":"https://openalex.org/keywords/overdispersion","display_name":"Overdispersion","score":0.8618000149726868},{"id":"https://openalex.org/keywords/poisson-distribution","display_name":"Poisson distribution","score":0.6848000288009644},{"id":"https://openalex.org/keywords/poisson-regression","display_name":"Poisson regression","score":0.6718000173568726},{"id":"https://openalex.org/keywords/multivariate-statistics","display_name":"Multivariate statistics","score":0.5098000168800354},{"id":"https://openalex.org/keywords/zero-inflated-model","display_name":"Zero-inflated model","score":0.4921000003814697},{"id":"https://openalex.org/keywords/regression","display_name":"Regression","score":0.428600013256073},{"id":"https://openalex.org/keywords/latent-variable","display_name":"Latent variable","score":0.3953000009059906},{"id":"https://openalex.org/keywords/generalized-linear-model","display_name":"Generalized linear model","score":0.3917999863624573}],"concepts":[{"id":"https://openalex.org/C33643355","wikidata":"https://www.wikidata.org/wiki/Q5176731","display_name":"Count data","level":3,"score":0.8797000050544739},{"id":"https://openalex.org/C117236510","wikidata":"https://www.wikidata.org/wiki/Q7113620","display_name":"Overdispersion","level":4,"score":0.8618000149726868},{"id":"https://openalex.org/C100906024","wikidata":"https://www.wikidata.org/wiki/Q205692","display_name":"Poisson distribution","level":2,"score":0.6848000288009644},{"id":"https://openalex.org/C73269764","wikidata":"https://www.wikidata.org/wiki/Q954529","display_name":"Poisson regression","level":3,"score":0.6718000173568726},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.5899999737739563},{"id":"https://openalex.org/C161584116","wikidata":"https://www.wikidata.org/wiki/Q1952580","display_name":"Multivariate statistics","level":2,"score":0.5098000168800354},{"id":"https://openalex.org/C88721176","wikidata":"https://www.wikidata.org/wiki/Q966010","display_name":"Zero-inflated model","level":4,"score":0.4921000003814697},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.4778999984264374},{"id":"https://openalex.org/C83546350","wikidata":"https://www.wikidata.org/wiki/Q1139051","display_name":"Regression","level":2,"score":0.428600013256073},{"id":"https://openalex.org/C51167844","wikidata":"https://www.wikidata.org/wiki/Q4422623","display_name":"Latent variable","level":2,"score":0.3953000009059906},{"id":"https://openalex.org/C41587187","wikidata":"https://www.wikidata.org/wiki/Q1501882","display_name":"Generalized linear model","level":2,"score":0.3917999863624573},{"id":"https://openalex.org/C177599991","wikidata":"https://www.wikidata.org/wiki/Q3706279","display_name":"Deviance (statistics)","level":2,"score":0.38960000872612},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.388700008392334},{"id":"https://openalex.org/C91025261","wikidata":"https://www.wikidata.org/wiki/Q7269460","display_name":"Quasi-likelihood","level":4,"score":0.37959998846054077},{"id":"https://openalex.org/C93959086","wikidata":"https://www.wikidata.org/wiki/Q6888345","display_name":"Model selection","level":2,"score":0.3646000027656555},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3497999906539917},{"id":"https://openalex.org/C152877465","wikidata":"https://www.wikidata.org/wiki/Q208042","display_name":"Regression analysis","level":2,"score":0.33399999141693115},{"id":"https://openalex.org/C129848803","wikidata":"https://www.wikidata.org/wiki/Q2564360","display_name":"Sample size determination","level":2,"score":0.32989999651908875},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.32409998774528503},{"id":"https://openalex.org/C2776036281","wikidata":"https://www.wikidata.org/wiki/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.3122999966144562},{"id":"https://openalex.org/C143121216","wikidata":"https://www.wikidata.org/wiki/Q1330402","display_name":"Microbiome","level":2,"score":0.30570000410079956},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.3050999939441681},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.30140000581741333},{"id":"https://openalex.org/C149782125","wikidata":"https://www.wikidata.org/wiki/Q160039","display_name":"Econometrics","level":1,"score":0.29829999804496765},{"id":"https://openalex.org/C117220453","wikidata":"https://www.wikidata.org/wiki/Q5172842","display_name":"Correlation","level":2,"score":0.2896000146865845},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.28130000829696655},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.27570000290870667},{"id":"https://openalex.org/C154606282","wikidata":"https://www.wikidata.org/wiki/Q7048579","display_name":"Nominal level","level":3,"score":0.25609999895095825},{"id":"https://openalex.org/C64708745","wikidata":"https://www.wikidata.org/wiki/Q2998010","display_name":"Partial correlation","level":3,"score":0.2542000114917755},{"id":"https://openalex.org/C9357733","wikidata":"https://www.wikidata.org/wiki/Q6878417","display_name":"Missing data","level":2,"score":0.2538999915122986}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2604.03853","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03853","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2604.03853","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2604.03853","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Multivariate":[0],"count":[1,45,170,178],"models":[2,171],"are":[3],"often":[4],"justified":[5],"by":[6],"their":[7],"ability":[8],"to":[9,57,62,113],"capture":[10],"latent":[11,168],"dependence,":[12],"but":[13],"researchers":[14],"receive":[15],"little":[16],"guidance":[17,164],"on":[18,24,52,92,106,144],"when":[19],"this":[20,32],"added":[21],"structure":[22],"improves":[23],"simpler":[25],"penalized":[26,173],"marginal":[27],"Poisson":[28,67,174],"regression.":[29],"We":[30],"study":[31],"question":[33],"using":[34,65],"real":[35],"microbiome":[36],"data":[37],"under":[38,69],"a":[39],"unified":[40],"held-out":[41,66],"evaluation":[42],"framework.":[43],"For":[44,82],"prediction,":[46],"we":[47,85],"compare":[48,86],"PLN":[49,103],"and":[50,60,88,135,172,180],"GLMNet(Poisson)":[51,89,105,150],"20":[53],"datasets":[54,96],"spanning":[55],"32":[56],"18,270":[58],"samples":[59],"24":[61],"257":[63],"taxa,":[64],"deviance":[68],"leave-one-taxon-out":[70],"prediction":[71,179],"with":[72,97,110,126,154],"3-fold":[73],"sample":[74],"cross-validation":[75],"rather":[76],"than":[77],"synthetic":[78],"or":[79,156],"in-sample":[80],"criteria.":[81],"network":[83],"inference,":[84],"PLNNetwork":[87,141],"neighborhood":[90],"selection":[91],"five":[93],"publicly":[94],"available":[95],"experimentally":[98],"validated":[99],"microbial":[100],"interaction":[101,147,181],"truth.":[102],"outperforms":[104],"most":[107],"count-prediction":[108],"datasets,":[109],"gains":[111],"up":[112],"38":[114],"percent.":[115],"The":[116],"primary":[117],"predictor":[118],"of":[119],"the":[120,123,131],"winner":[121],"is":[122,151],"sample-to-taxon":[124],"ratio,":[125],"mean":[127],"absolute":[128],"correlation":[129],"as":[130,137],"strongest":[132],"secondary":[133],"signal":[134],"overdispersion":[136],"an":[138],"additional":[139],"predictor.":[140],"performs":[142],"best":[143],"broad":[145],"undirected":[146],"benchmarks,":[148],"whereas":[149],"better":[152],"aligned":[153],"local":[155],"directional":[157],"effects.":[158],"Taken":[159],"together,":[160],"these":[161],"results":[162],"provide":[163],"for":[165],"choosing":[166],"between":[167],"multivariate":[169],"regression":[175],"in":[176],"biological":[177],"recovery.":[182]},"counts_by_year":[],"updated_date":"2026-05-05T08:41:31.759640","created_date":"2026-04-08T00:00:00"}
