{"id":"https://openalex.org/W7160839805","doi":"https://doi.org/10.48550/arxiv.2605.06720","title":"Conditional generation of antibody sequences with classifier-guided germline-absorbing discrete diffusion","display_name":"Conditional generation of antibody sequences with classifier-guided germline-absorbing discrete diffusion","publication_year":2026,"publication_date":"2026-05-07","ids":{"openalex":"https://openalex.org/W7160839805","doi":"https://doi.org/10.48550/arxiv.2605.06720"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2605.06720","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06720","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2605.06720","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5109770319","display_name":"Justin Sanders","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sanders, Justin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5026804774","display_name":"Luca Giancardo","orcid":"https://orcid.org/0000-0002-4862-2277"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Giancardo, Luca","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135887862","display_name":"Lan Guo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Guo, Lan","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135872964","display_name":"Yue Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Yue","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5135872822","display_name":"Kemal Sonmez","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sonmez, Kemal","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122375224","display_name":"Nina Cheng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Cheng, Nina","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5135836717","display_name":"Melih Yilmaz","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yilmaz, Melih","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":7,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11016","display_name":"Monoclonal and Polyclonal Antibodies Research","score":0.949400007724762,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},"topics":[{"id":"https://openalex.org/T11016","display_name":"Monoclonal and Polyclonal Antibodies Research","score":0.949400007724762,"subfield":{"id":"https://openalex.org/subfields/2741","display_name":"Radiology, Nuclear Medicine and Imaging"},"field":{"id":"https://openalex.org/fields/27","display_name":"Medicine"},"domain":{"id":"https://openalex.org/domains/4","display_name":"Health Sciences"}},{"id":"https://openalex.org/T12576","display_name":"vaccines and immunoinformatics approaches","score":0.029200000688433647,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11124","display_name":"Protein purification and stability","score":0.003599999938160181,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/germline","display_name":"Germline","score":0.6184999942779541},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.5738999843597412},{"id":"https://openalex.org/keywords/markov-chain","display_name":"Markov chain","score":0.5444999933242798},{"id":"https://openalex.org/keywords/discrete-time-and-continuous-time","display_name":"Discrete time and continuous time","score":0.4507000148296356},{"id":"https://openalex.org/keywords/class","display_name":"Class (philosophy)","score":0.3571000099182129},{"id":"https://openalex.org/keywords/conditional-probability-distribution","display_name":"Conditional probability distribution","score":0.3560999929904938},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.3449000120162964},{"id":"https://openalex.org/keywords/stability","display_name":"Stability (learning theory)","score":0.3440000116825104},{"id":"https://openalex.org/keywords/hidden-markov-model","display_name":"Hidden Markov model","score":0.3418000042438507}],"concepts":[{"id":"https://openalex.org/C109825262","wikidata":"https://www.wikidata.org/wiki/Q1139164","display_name":"Germline","level":3,"score":0.6184999942779541},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5763999819755554},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.5738999843597412},{"id":"https://openalex.org/C98763669","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov chain","level":2,"score":0.5444999933242798},{"id":"https://openalex.org/C55689738","wikidata":"https://www.wikidata.org/wiki/Q15963867","display_name":"Discrete time and continuous time","level":2,"score":0.4507000148296356},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.40209999680519104},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.36000001430511475},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.3596999943256378},{"id":"https://openalex.org/C2777212361","wikidata":"https://www.wikidata.org/wiki/Q5127848","display_name":"Class (philosophy)","level":2,"score":0.3571000099182129},{"id":"https://openalex.org/C43555835","wikidata":"https://www.wikidata.org/wiki/Q2300258","display_name":"Conditional probability distribution","level":2,"score":0.3560999929904938},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.3449000120162964},{"id":"https://openalex.org/C112972136","wikidata":"https://www.wikidata.org/wiki/Q7595718","display_name":"Stability (learning theory)","level":2,"score":0.3440000116825104},{"id":"https://openalex.org/C23224414","wikidata":"https://www.wikidata.org/wiki/Q176769","display_name":"Hidden Markov model","level":2,"score":0.3418000042438507},{"id":"https://openalex.org/C149441793","wikidata":"https://www.wikidata.org/wiki/Q200726","display_name":"Probability distribution","level":2,"score":0.32429999113082886},{"id":"https://openalex.org/C158424031","wikidata":"https://www.wikidata.org/wiki/Q1191905","display_name":"Gibbs sampling","level":3,"score":0.32409998774528503},{"id":"https://openalex.org/C140779682","wikidata":"https://www.wikidata.org/wiki/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.3197999894618988},{"id":"https://openalex.org/C77553402","wikidata":"https://www.wikidata.org/wiki/Q13222579","display_name":"Upper and lower bounds","level":2,"score":0.3091000020503998},{"id":"https://openalex.org/C79772020","wikidata":"https://www.wikidata.org/wiki/Q5159264","display_name":"Conditional independence","level":2,"score":0.2962000072002411},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.2879999876022339},{"id":"https://openalex.org/C111350023","wikidata":"https://www.wikidata.org/wiki/Q1191869","display_name":"Markov chain Monte Carlo","level":3,"score":0.28630000352859497},{"id":"https://openalex.org/C159886148","wikidata":"https://www.wikidata.org/wiki/Q176645","display_name":"Markov process","level":2,"score":0.28600001335144043},{"id":"https://openalex.org/C80444323","wikidata":"https://www.wikidata.org/wiki/Q2878974","display_name":"Theoretical computer science","level":1,"score":0.2854999899864197},{"id":"https://openalex.org/C35639132","wikidata":"https://www.wikidata.org/wiki/Q7452468","display_name":"Sequence labeling","level":3,"score":0.27149999141693115},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.26910001039505005},{"id":"https://openalex.org/C13514818","wikidata":"https://www.wikidata.org/wiki/Q5552256","display_name":"Germline mutation","level":4,"score":0.2639999985694885},{"id":"https://openalex.org/C198531522","wikidata":"https://www.wikidata.org/wiki/Q485146","display_name":"Sample (material)","level":2,"score":0.2624000012874603},{"id":"https://openalex.org/C52740198","wikidata":"https://www.wikidata.org/wiki/Q1539564","display_name":"Importance sampling","level":3,"score":0.2590000033378601},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.2526000142097473}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2605.06720","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06720","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2605.06720","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2605.06720","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Antibody":[0],"therapeutics":[1],"are":[2],"among":[3],"the":[4,107,114,126,135,139,155,180,192,200],"most":[5],"successful":[6],"modern":[7],"medicines,":[8],"yet":[9],"computationally":[10],"designing":[11],"antibodies":[12,206],"with":[13,207,239],"desirable":[14],"binding":[15,212],"and":[16,55,150,158,210,226],"developability":[17],"properties":[18],"remains":[19],"challenging.":[20],"While":[21],"protein":[22],"language":[23,82],"models":[24],"(pLMs)":[25],"have":[26],"emerged":[27],"as":[28,125],"powerful":[29],"tools":[30],"for":[31,60,90],"antibody":[32,86],"sequence":[33,116,122],"design,":[34],"existing":[35],"approaches":[36],"largely":[37],"suffer":[38],"from":[39,141,154,173,237],"two":[40,70],"key":[41],"limitations:":[42],"they":[43,56],"predominantly":[44],"memorize":[45],"germline":[46,100,115,142,161,166,196],"sequences":[47,87],"rather":[48,118],"than":[49,119],"modeling":[50,83],"biologically":[51,130],"meaningful":[52],"somatic":[53],"variation,":[54],"offer":[57],"limited":[58],"support":[59],"flexible":[61],"classifier-guided":[62],"conditional":[63,201],"generation.":[64],"We":[65,163,189],"address":[66],"these":[67],"challenges":[68],"through":[69],"primary":[71],"contributions.":[72],"First,":[73],"we":[74,98],"demonstrate":[75,191],"that":[76,165],"discrete":[77,108,241],"diffusion":[78,109,167,197],"fine-tuning":[79],"achieves":[80],"strong":[81],"performance":[84],"on":[85,93,199],"while":[88],"allowing":[89],"generation":[91,202],"conditioned":[92],"any":[94],"off-the-shelf":[95],"classifier.":[96],"Second,":[97],"introduce":[99],"absorbing":[101,127],"diffusion,":[102],"a":[103,120,232],"novel":[104],"modification":[105],"of":[106,194,204],"noise":[110],"process":[111],"in":[112],"which":[113],"-":[117,123],"masked":[121],"serves":[124],"state.":[128],"This":[129],"motivated":[131],"inductive":[132],"bias":[133],"restricts":[134],"model":[136,198,218],"to":[137,143,176,235],"learning":[138],"trajectory":[140],"observed":[144],"sequence,":[145],"effectively":[146],"excluding":[147],"genetic":[148],"variation":[149],"V(D)J":[151],"recombination":[152],"statistics":[153],"learned":[156],"distribution":[157],"dramatically":[159],"mitigating":[160],"bias.":[162],"show":[164],"improves":[168],"non-germline":[169],"residue":[170],"prediction":[171],"accuracy":[172],"26":[174],"percent":[175],"46":[177],"percent,":[178],"approaching":[179],"theoretical":[181],"upper":[182],"bound":[183],"set":[184],"by":[185],"true":[186],"biological":[187],"variability.":[188],"then":[190],"utility":[193],"our":[195,217],"tasks":[203,216],"sampling":[205],"improved":[208,221],"hydrophobicity":[209],"predicted":[211],"affinity.":[213],"On":[214],"both":[215],"shows":[219],"an":[220],"tradeoff":[222],"between":[223],"class":[224],"adherence":[225],"sample":[227,236],"quality,":[228],"significantly":[229],"outperforming":[230],"EvoProtGrad,":[231],"popular":[233],"strategy":[234],"pLMs":[238],"gradient-based":[240],"Markov":[242],"Chain":[243],"Monte":[244],"Carlo.":[245]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2026-05-12T00:00:00"}
