{"id":"https://openalex.org/W6949575209","doi":"https://doi.org/10.5281/zenodo.15165883","title":"T-REx Bite 1.0","display_name":"T-REx Bite 1.0","publication_year":2025,"publication_date":"2025-04-07","ids":{"openalex":"https://openalex.org/W6949575209","doi":"https://doi.org/10.5281/zenodo.15165883"},"language":"en","primary_location":{"id":"doi:10.5281/zenodo.15165883","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15165883","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"type":"dataset","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.5281/zenodo.15165883","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Barmettler, Joel","orcid":"https://orcid.org/0009-0006-5118-7129"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":true,"raw_author_name":"Barmettler, Joel","raw_affiliation_strings":["University of Zurich"],"affiliations":[{"raw_affiliation_string":"University of Zurich","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Bernstein, Abraham","orcid":"https://orcid.org/0000-0002-0128-4602"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]}],"countries":["CH"],"is_corresponding":false,"raw_author_name":"Bernstein, Abraham","raw_affiliation_strings":["University of Zurich"],"affiliations":[{"raw_affiliation_string":"University of Zurich","institution_ids":["https://openalex.org/I202697423"]}]},{"author_position":"last","author":{"id":null,"display_name":"Rossetto, Luca","orcid":"https://orcid.org/0000-0002-5389-9465"},"institutions":[{"id":"https://openalex.org/I202697423","display_name":"University of Zurich","ror":"https://ror.org/02crff812","country_code":"CH","type":"education","lineage":["https://openalex.org/I202697423"]},{"id":"https://openalex.org/I42934936","display_name":"Dublin City University","ror":"https://ror.org/04a1a1e81","country_code":"IE","type":"education","lineage":["https://openalex.org/I42934936"]}],"countries":["CH","IE"],"is_corresponding":false,"raw_author_name":"Rossetto, Luca","raw_affiliation_strings":["University of Zurich","Dublin City University"],"affiliations":[{"raw_affiliation_string":"University of Zurich","institution_ids":["https://openalex.org/I202697423"]},{"raw_affiliation_string":"Dublin City University","institution_ids":["https://openalex.org/I42934936"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":["https://openalex.org/I202697423"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":true,"primary_topic":{"id":"https://openalex.org/T13568","display_name":"Wood and Agarwood Research","score":0.13369999825954437,"subfield":{"id":"https://openalex.org/subfields/1605","display_name":"Organic Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T13568","display_name":"Wood and Agarwood Research","score":0.13369999825954437,"subfield":{"id":"https://openalex.org/subfields/1605","display_name":"Organic Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10895","display_name":"Species Distribution and Climate Change","score":0.08209999650716782,"subfield":{"id":"https://openalex.org/subfields/2302","display_name":"Ecological Modeling"},"field":{"id":"https://openalex.org/fields/23","display_name":"Environmental Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14297","display_name":"Plant-Derived Bioactive Compounds","score":0.05009999871253967,"subfield":{"id":"https://openalex.org/subfields/1605","display_name":"Organic Chemistry"},"field":{"id":"https://openalex.org/fields/16","display_name":"Chemistry"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/snippet","display_name":"Snippet","score":0.9187999963760376},{"id":"https://openalex.org/keywords/subject","display_name":"Subject (documents)","score":0.7681000232696533},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6265000104904175},{"id":"https://openalex.org/keywords/object","display_name":"Object (grammar)","score":0.6173999905586243},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5834000110626221},{"id":"https://openalex.org/keywords/clarity","display_name":"CLARITY","score":0.4999000132083893}],"concepts":[{"id":"https://openalex.org/C2777822670","wikidata":"https://www.wikidata.org/wiki/Q1120538","display_name":"Snippet","level":2,"score":0.9187999963760376},{"id":"https://openalex.org/C2777855551","wikidata":"https://www.wikidata.org/wiki/Q12310021","display_name":"Subject (documents)","level":2,"score":0.7681000232696533},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7257999777793884},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6265000104904175},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.6173999905586243},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5834000110626221},{"id":"https://openalex.org/C2777146004","wikidata":"https://www.wikidata.org/wiki/Q14949826","display_name":"CLARITY","level":2,"score":0.4999000132083893},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4625000059604645},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4207000136375427},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3763999938964844},{"id":"https://openalex.org/C2781316041","wikidata":"https://www.wikidata.org/wiki/Q1230584","display_name":"Diversity (politics)","level":2,"score":0.3467999994754791},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.33959999680519104},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.29660001397132874},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.29440000653266907},{"id":"https://openalex.org/C2779304628","wikidata":"https://www.wikidata.org/wiki/Q3503480","display_name":"Face (sociological concept)","level":2,"score":0.289900004863739},{"id":"https://openalex.org/C2775936607","wikidata":"https://www.wikidata.org/wiki/Q466845","display_name":"Tracking (education)","level":2,"score":0.2890999913215637},{"id":"https://openalex.org/C2778137410","wikidata":"https://www.wikidata.org/wiki/Q2732820","display_name":"Government (linguistics)","level":2,"score":0.26010000705718994}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.5281/zenodo.15165883","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15165883","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"dataset"}],"best_oa_location":{"id":"doi:10.5281/zenodo.15165883","is_oa":true,"landing_page_url":"https://doi.org/10.5281/zenodo.15165883","pdf_url":null,"source":{"id":"https://openalex.org/S4306400562","display_name":"Zenodo (CERN European Organization for Nuclear Research)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I67311998","host_organization_name":"European Organization for Nuclear Research","host_organization_lineage":["https://openalex.org/I67311998"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"dataset"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"T-REx":[0,3,99,140],"Bite":[1],"adapts":[2],"to":[4,94,144],"the":[5,15,19,44,52,74,89,95,134,161,165,179],"next-token-prediction":[6],"paradigm":[7],"by":[8],"ensuring":[9],"that,":[10],"in":[11,26,98],"each":[12,63],"text":[13,131],"snippet,":[14],"subject":[16,34,79],"appears":[17],"before":[18],"object.":[20,46],"This":[21,150],"alignment":[22],"mimics":[23],"real-world":[24],"scenarios":[25],"which":[27],"a":[28,85,126,146],"model":[29],"sees":[30],"partial":[31],"information":[32],"(the":[33],"and":[35,38,80,91,118,137],"some":[36],"context)":[37],"must":[39],"then":[40],"predict":[41],"or":[42],"\"retrieve\"":[43],"missing":[45],"To":[47],"keep":[48],"snippets":[49],"manageable":[50],"within":[51],"limited":[53],"context":[54],"windows":[55],"of":[56,129,139,182],"smaller":[57],"language":[58],"models":[59],"(such":[60],"as":[61],"GPT-2),":[62],"snippet":[64,75],"is":[65,92,125,142],"capped":[66],"at":[67,88],"512":[68],"characters.":[69],"We":[70],"further":[71],"require":[72],"that":[73,132],"explicitly":[76],"mentions":[77],"both":[78],"object,":[81,90],"does":[82],"not":[83],"start":[84],"new":[86],"sentence":[87],"linked":[93],"corresponding":[96],"subgraph":[97],"Star.":[100],"By":[101],"applying":[102],"these":[103],"constraints,":[104],"we":[105],"obtain":[106],"about":[107],"6.4":[108],"million":[109,115,120],"short":[110],"\"bites\"":[111],"for":[112,116,121],"training,":[113],"0.92":[114],"testing,":[117],"0.75":[119],"validation.":[122],"Each":[123],"bite":[124],"compact":[127],"piece":[128],"Wikipedia":[130],"retains":[133],"original":[135],"clarity":[136],"diversity":[138],"but":[141],"tailored":[143],"ensure":[145],"direct":[147],"subject--object":[148],"alignment.":[149],"structure":[151],"lets":[152],"researchers":[153],"readily":[154],"evaluate":[155],"how":[156],"well":[157],"an":[158],"LLM":[159],"completes":[160],"object":[162],"token(s)":[163],"given":[164],"preceding":[166],"subject.":[167],"The":[168],"dataset":[169],"naturally":[170],"accommodates":[171],"multi-token":[172],"objects":[173],"under":[174],"modern":[175],"sub-word":[176],"tokenization,":[177],"removing":[178],"single-token":[180],"assumptions":[181],"LAMA-like":[183],"methods.":[184]},"counts_by_year":[],"updated_date":"2025-11-06T06:51:31.235846","created_date":"2025-10-10T00:00:00"}
