{"id":"https://openalex.org/W7120021928","doi":"https://doi.org/10.48550/arxiv.2601.04524","title":"BioPIE: A Biomedical Protocol Information Extraction Dataset for High-Reasoning-Complexity Experiment Question Answer","display_name":"BioPIE: A Biomedical Protocol Information Extraction Dataset for High-Reasoning-Complexity Experiment Question Answer","publication_year":2026,"publication_date":"2026-01-08","ids":{"openalex":"https://openalex.org/W7120021928","doi":"https://doi.org/10.48550/arxiv.2601.04524"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2601.04524","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.04524","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2601.04524","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102691699","display_name":"Haofei Hou","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Hou, Haofei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122565391","display_name":"Shunyi Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Shunyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122459970","display_name":"Fanxu Meng","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Meng, Fanxu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122590141","display_name":"Kairui Yang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Kairui","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5122562591","display_name":"Lecheng Ruan","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ruan, Lecheng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5122559462","display_name":"Qining Wang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Qining","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5102691699"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7555000185966492,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.7555000185966492,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11710","display_name":"Biomedical Text Mining and Ontologies","score":0.030300000682473183,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.02879999950528145,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/protocol","display_name":"Protocol (science)","score":0.6553000211715698},{"id":"https://openalex.org/keywords/focus","display_name":"Focus (optics)","score":0.5565000176429749},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5254999995231628},{"id":"https://openalex.org/keywords/questions-and-answers","display_name":"Questions and answers","score":0.46399998664855957},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.37560001015663147},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.3646000027656555},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.35359999537467957}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7418000102043152},{"id":"https://openalex.org/C2780385302","wikidata":"https://www.wikidata.org/wiki/Q367158","display_name":"Protocol (science)","level":3,"score":0.6553000211715698},{"id":"https://openalex.org/C192209626","wikidata":"https://www.wikidata.org/wiki/Q190909","display_name":"Focus (optics)","level":2,"score":0.5565000176429749},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5254999995231628},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.48190000653266907},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.47510001063346863},{"id":"https://openalex.org/C3019144022","wikidata":"https://www.wikidata.org/wiki/Q4124998","display_name":"Questions and answers","level":2,"score":0.46399998664855957},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.46140000224113464},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.37560001015663147},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.3646000027656555},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.35359999537467957},{"id":"https://openalex.org/C120567893","wikidata":"https://www.wikidata.org/wiki/Q1582085","display_name":"Knowledge extraction","level":2,"score":0.35199999809265137},{"id":"https://openalex.org/C2780966255","wikidata":"https://www.wikidata.org/wiki/Q5474306","display_name":"Foundation (evidence)","level":2,"score":0.3188000023365021},{"id":"https://openalex.org/C180198813","wikidata":"https://www.wikidata.org/wiki/Q121182","display_name":"Information system","level":2,"score":0.31869998574256897},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3156000077724457},{"id":"https://openalex.org/C55037315","wikidata":"https://www.wikidata.org/wiki/Q5421151","display_name":"Experimental data","level":2,"score":0.3156000077724457},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.2928999960422516},{"id":"https://openalex.org/C2987255567","wikidata":"https://www.wikidata.org/wiki/Q33002955","display_name":"Knowledge graph","level":2,"score":0.28780001401901245},{"id":"https://openalex.org/C161301231","wikidata":"https://www.wikidata.org/wiki/Q3478658","display_name":"Knowledge representation and reasoning","level":2,"score":0.26019999384880066}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2601.04524","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.04524","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2601.04524","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2601.04524","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Question":[0],"Answer":[1],"(QA)":[2],"systems":[3],"for":[4,15,32],"biomedical":[5,33,47,51,104,147],"experiments":[6,105],"facilitate":[7],"cross-disciplinary":[8],"communication,":[9],"and":[10,25,59,71,95,115,129,144],"serve":[11],"as":[12],"a":[13,85,98,117],"foundation":[14],"downstream":[16],"tasks,":[17],"e.g.,":[18,40],"laboratory":[19],"automation.":[20],"High":[21],"Information":[22,81],"Density":[23],"(HID)":[24],"Multi-Step":[26],"Reasoning":[27],"(MSR)":[28],"pose":[29],"unique":[30],"challenges":[31],"experimental":[34,48,66,92,137],"QA.":[35,49],"While":[36],"extracting":[37],"structured":[38,136],"knowledge,":[39],"Knowledge":[41],"Graphs":[42],"(KGs),":[43],"can":[44],"substantially":[45],"benefit":[46],"Existing":[50],"datasets":[52],"focus":[53],"on":[54,113,126],"general":[55],"or":[56],"coarsegrained":[57],"knowledge":[58,138],"thus":[60],"fail":[61],"to":[62],"support":[63],"the":[64,135],"fine-grained":[65],"reasoning":[67,102],"demanded":[68],"by":[69],"HID":[70],"MSR.":[72],"To":[73],"address":[74],"this":[75],"gap,":[76],"we":[77],"introduce":[78],"Biomedical":[79],"Protocol":[80],"Extraction":[82],"Dataset":[83],"(BioPIE),":[84],"dataset":[86],"that":[87,100,120,134],"provides":[88],"procedure-centric":[89],"KGs":[90],"of":[91],"entities,":[93],"actions,":[94],"relations":[96],"at":[97],"scale":[99],"supports":[101],"over":[103],"across":[106],"protocols.":[107],"We":[108],"evaluate":[109],"information":[110],"extraction":[111],"methods":[112],"BioPIE,":[114,122],"implement":[116],"QA":[118],"system":[119],"leverages":[121],"showcasing":[123],"performance":[124],"gains":[125],"test,":[127],"HID,":[128],"MSR":[130],"question":[131],"sets,":[132],"showing":[133],"in":[139],"BioPIE":[140],"underpins":[141],"both":[142],"AI-assisted":[143],"more":[145],"autonomous":[146],"experimentation.":[148]},"counts_by_year":[],"updated_date":"2026-01-10T23:44:22.266649","created_date":"2026-01-10T00:00:00"}
