{"id":"https://openalex.org/W4413046406","doi":"https://doi.org/10.1021/acs.jcim.5c00840","title":"Autogenerating a Domain-Specific Question-Answering Data Set from a Thermoelectric Materials Database to Enable High-Performing BERT Models","display_name":"Autogenerating a Domain-Specific Question-Answering Data Set from a Thermoelectric Materials Database to Enable High-Performing BERT Models","publication_year":2025,"publication_date":"2025-08-07","ids":{"openalex":"https://openalex.org/W4413046406","doi":"https://doi.org/10.1021/acs.jcim.5c00840","pmid":"https://pubmed.ncbi.nlm.nih.gov/40773661"},"language":"en","primary_location":{"id":"doi:10.1021/acs.jcim.5c00840","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00840","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00840?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","pubmed"],"open_access":{"is_oa":true,"oa_status":"hybrid","oa_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00840?ref=article_openPDF","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034880949","display_name":"Odysseas Sierepeklis","orcid":"https://orcid.org/0000-0002-5102-1018"},"institutions":[{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]},{"id":"https://openalex.org/I4210096386","display_name":"Bridge University","ror":"https://ror.org/00cbm0437","country_code":"SS","type":"education","lineage":["https://openalex.org/I4210096386"]}],"countries":["GB","SS"],"is_corresponding":false,"raw_author_name":"Odysseas Sierepeklis","raw_affiliation_strings":["Cavendish Laboratory, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I4210096386","https://openalex.org/I241749"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5068607578","display_name":"Jacqueline M. Cole","orcid":"https://orcid.org/0000-0002-1552-8743"},"institutions":[{"id":"https://openalex.org/I1286704778","display_name":"Rutherford Appleton Laboratory","ror":"https://ror.org/03gq8fr08","country_code":"GB","type":"facility","lineage":["https://openalex.org/I1286704778","https://openalex.org/I162524378","https://openalex.org/I4210087105"]},{"id":"https://openalex.org/I162524378","display_name":"Science and Technology Facilities Council","ror":"https://ror.org/057g20z61","country_code":"GB","type":"government","lineage":["https://openalex.org/I162524378","https://openalex.org/I4210087105"]},{"id":"https://openalex.org/I241749","display_name":"University of Cambridge","ror":"https://ror.org/013meh722","country_code":"GB","type":"education","lineage":["https://openalex.org/I241749"]},{"id":"https://openalex.org/I4210096386","display_name":"Bridge University","ror":"https://ror.org/00cbm0437","country_code":"SS","type":"education","lineage":["https://openalex.org/I4210096386"]}],"countries":["GB","SS"],"is_corresponding":true,"raw_author_name":"Jacqueline M. Cole","raw_affiliation_strings":["Cavendish Laboratory, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","Harwell Science and Innovation Campus","Science and Technology Facilities Council, Rutherford Appleton Laboratory","Science and Technology Facilities Council, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K"],"raw_orcid":"https://orcid.org/0000-0002-1552-8743","affiliations":[{"raw_affiliation_string":"Cavendish Laboratory, University of Cambridge, J. J. Thomson Avenue, Cambridge CB3 0HE, U.K","institution_ids":["https://openalex.org/I4210096386","https://openalex.org/I241749"]},{"raw_affiliation_string":"Harwell Science and Innovation Campus","institution_ids":[]},{"raw_affiliation_string":"Science and Technology Facilities Council, Rutherford Appleton Laboratory","institution_ids":["https://openalex.org/I162524378","https://openalex.org/I1286704778"]},{"raw_affiliation_string":"Science and Technology Facilities Council, Rutherford Appleton Laboratory, Harwell Science and Innovation Campus, Didcot, Oxfordshire OX11 0QX, U.K","institution_ids":["https://openalex.org/I162524378"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5068607578"],"corresponding_institution_ids":["https://openalex.org/I1286704778","https://openalex.org/I162524378","https://openalex.org/I241749","https://openalex.org/I4210096386"],"apc_list":null,"apc_paid":null,"fwci":5.8667,"has_fulltext":true,"cited_by_count":3,"citation_normalized_percentile":{"value":0.95941191,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":91,"max":99},"biblio":{"volume":"65","issue":"16","first_page":"8579","last_page":"8592"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11948","display_name":"Machine Learning in Materials Science","score":0.9625999927520752,"subfield":{"id":"https://openalex.org/subfields/2505","display_name":"Materials Chemistry"},"field":{"id":"https://openalex.org/fields/25","display_name":"Materials Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8039958477020264},{"id":"https://openalex.org/keywords/set","display_name":"Set (abstract data type)","score":0.7148087024688721},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.6510531306266785},{"id":"https://openalex.org/keywords/data-set","display_name":"Data set","score":0.6101272702217102},{"id":"https://openalex.org/keywords/field","display_name":"Field (mathematics)","score":0.5984909534454346},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.5604358911514282},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.4998586177825928},{"id":"https://openalex.org/keywords/small-data","display_name":"Small data","score":0.4864315986633301},{"id":"https://openalex.org/keywords/test-set","display_name":"Test set","score":0.466916561126709},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43080461025238037},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.4287322461605072},{"id":"https://openalex.org/keywords/test-data","display_name":"Test data","score":0.42071324586868286},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3654536008834839},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.35323166847229004},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3432806730270386},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.14844483137130737}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8039958477020264},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.7148087024688721},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.6510531306266785},{"id":"https://openalex.org/C58489278","wikidata":"https://www.wikidata.org/wiki/Q1172284","display_name":"Data set","level":2,"score":0.6101272702217102},{"id":"https://openalex.org/C9652623","wikidata":"https://www.wikidata.org/wiki/Q190109","display_name":"Field (mathematics)","level":2,"score":0.5984909534454346},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.5604358911514282},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.4998586177825928},{"id":"https://openalex.org/C2779280203","wikidata":"https://www.wikidata.org/wiki/Q17121211","display_name":"Small data","level":2,"score":0.4864315986633301},{"id":"https://openalex.org/C169903167","wikidata":"https://www.wikidata.org/wiki/Q3985153","display_name":"Test set","level":2,"score":0.466916561126709},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43080461025238037},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.4287322461605072},{"id":"https://openalex.org/C16910744","wikidata":"https://www.wikidata.org/wiki/Q7705759","display_name":"Test data","level":2,"score":0.42071324586868286},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3654536008834839},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.35323166847229004},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3432806730270386},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.14844483137130737},{"id":"https://openalex.org/C202444582","wikidata":"https://www.wikidata.org/wiki/Q837863","display_name":"Pure mathematics","level":1,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0}],"mesh":[{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true},{"descriptor_ui":"D016208","descriptor_name":"Databases, Factual","qualifier_ui":null,"qualifier_name":null,"is_major_topic":true}],"locations_count":5,"locations":[{"id":"doi:10.1021/acs.jcim.5c00840","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00840","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00840?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},{"id":"pmid:40773661","is_oa":false,"landing_page_url":"https://pubmed.ncbi.nlm.nih.gov/40773661","pdf_url":null,"source":{"id":"https://openalex.org/S4306525036","display_name":"PubMed","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1299303238","host_organization_name":"National Institutes of Health","host_organization_lineage":["https://openalex.org/I1299303238"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of chemical information and modeling","raw_type":null},{"id":"pmh:oai:www.repository.cam.ac.uk:1810/388677","is_oa":true,"landing_page_url":"https://www.repository.cam.ac.uk/handle/1810/388677","pdf_url":"https://www.repository.cam.ac.uk/bitstreams/ac7a01fe-ae97-420b-a63b-54b964133162/download","source":{"id":"https://openalex.org/S4306401777","display_name":"Apollo (University of Cambridge)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I241749","host_organization_name":"University of Cambridge","host_organization_lineage":["https://openalex.org/I241749"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"pmh:oai:europepmc.org:11190315","is_oa":true,"landing_page_url":"https://www.ncbi.nlm.nih.gov/pmc/articles/12381847","pdf_url":null,"source":{"id":"https://openalex.org/S4306400806","display_name":"Europe PMC (PubMed Central)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I1303153112","host_organization_name":"European Bioinformatics Institute","host_organization_lineage":["https://openalex.org/I1303153112"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Text"},{"id":"pmh:oai:purl.org/net/epubs:work/62531513","is_oa":true,"landing_page_url":"https://epubs.stfc.ac.uk/work/62531513","pdf_url":null,"source":{"id":"https://openalex.org/S4306400334","display_name":"Science and Technology Facilities Council","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Journal Article"}],"best_oa_location":{"id":"doi:10.1021/acs.jcim.5c00840","is_oa":true,"landing_page_url":"https://doi.org/10.1021/acs.jcim.5c00840","pdf_url":"https://pubs.acs.org/doi/pdf/10.1021/acs.jcim.5c00840?ref=article_openPDF","source":{"id":"https://openalex.org/S167262187","display_name":"Journal of Chemical Information and Modeling","issn_l":"1549-9596","issn":["1549-9596","1549-960X"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https://openalex.org/P4310320006","host_organization_name":"American Chemical Society","host_organization_lineage":["https://openalex.org/P4310320006"],"host_organization_lineage_names":["American Chemical Society"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Journal of Chemical Information and Modeling","raw_type":"journal-article"},"sustainable_development_goals":[{"display_name":"Climate action","score":0.5400000214576721,"id":"https://metadata.un.org/sdg/13"}],"awards":[{"id":"https://openalex.org/G1531024995","display_name":null,"funder_award_id":"RCSRF1819\\7\\10","funder_id":"https://openalex.org/F4320320005","funder_display_name":"Royal Academy of Engineering"},{"id":"https://openalex.org/G3688903959","display_name":"DTP 2018-19 University of Cambridge","funder_award_id":"EP/R513180/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https://openalex.org/G7927981180","display_name":"DTP 2020-2021 University of Cambridge","funder_award_id":"EP/T517847/1","funder_id":"https://openalex.org/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https://openalex.org/F4320320005","display_name":"Royal Academy of Engineering","ror":"https://ror.org/0526snb40"},{"id":"https://openalex.org/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https://ror.org/0439y7842"},{"id":"https://openalex.org/F4320334632","display_name":"Science and Technology Facilities Council","ror":"https://ror.org/057g20z61"},{"id":"https://openalex.org/F4320337480","display_name":"Basic Energy Sciences","ror":"https://ror.org/05mg91w61"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4413046406.pdf","grobid_xml":"https://content.openalex.org/works/W4413046406.grobid-xml"},"referenced_works_count":45,"referenced_works":["https://openalex.org/W2016589492","https://openalex.org/W2019249776","https://openalex.org/W2427527485","https://openalex.org/W2523785361","https://openalex.org/W2529518929","https://openalex.org/W2763421725","https://openalex.org/W2796108585","https://openalex.org/W2798836595","https://openalex.org/W2808304511","https://openalex.org/W2809621129","https://openalex.org/W2896457183","https://openalex.org/W2911489562","https://openalex.org/W2946628371","https://openalex.org/W2950768109","https://openalex.org/W2951025380","https://openalex.org/W2980282514","https://openalex.org/W2987154291","https://openalex.org/W2996500226","https://openalex.org/W3047398431","https://openalex.org/W3104636952","https://openalex.org/W3200122731","https://openalex.org/W3201869313","https://openalex.org/W4214774784","https://openalex.org/W4224442790","https://openalex.org/W4225378608","https://openalex.org/W4225409008","https://openalex.org/W4229443452","https://openalex.org/W4241618768","https://openalex.org/W4281617541","https://openalex.org/W4288407534","https://openalex.org/W4292358656","https://openalex.org/W4307139584","https://openalex.org/W4327913228","https://openalex.org/W4385671288","https://openalex.org/W4386955948","https://openalex.org/W4388890883","https://openalex.org/W4390011017","https://openalex.org/W4390511840","https://openalex.org/W4390544547","https://openalex.org/W4390951820","https://openalex.org/W4391836235","https://openalex.org/W4392002118","https://openalex.org/W4393277170","https://openalex.org/W4396524322","https://openalex.org/W4407892736"],"related_works":["https://openalex.org/W4288267738","https://openalex.org/W2964413124","https://openalex.org/W4388937922","https://openalex.org/W3113264705","https://openalex.org/W2028462208","https://openalex.org/W2982831492","https://openalex.org/W4285337533","https://openalex.org/W2187490799","https://openalex.org/W1574942924","https://openalex.org/W3094735304"],"abstract_inverted_index":{"We":[0,17,62],"present":[1],"a":[2,6,13,20,50],"method":[3],"for":[4],"autogenerating":[5],"large":[7],"domain-specific":[8,152],"question-answering":[9],"(QA)":[10],"dataset":[11,31],"from":[12],"thermoelectric":[14,37,46],"materials":[15,47,153],"database.":[16],"show":[18,64],"that":[19,65],"small":[21,143],"language":[22,144],"model,":[23],"BERT,":[24],"once":[25],"fine-tuned":[26,53,95,104],"on":[27,54,96,105,126],"this":[28],"automatically":[29],"generated":[30,159],"of":[32,45,116,122],"99,757":[33],"QA":[34,58],"pairs":[35],"about":[36],"materials,":[38],"affords":[39],"better":[40,89],"performance":[41],"in":[42],"the":[43,55,67,83,97,102,106,138],"field":[44],"compared":[48],"to":[49,86,140,161],"BERT":[51,84,93],"model":[52,85,94],"generic":[56],"English-language":[57],"data":[59,69,99,109,129,154],"set,":[60],"SQuAD-v2.":[61],"further":[63],"mixing":[66],"two":[68,108],"sets":[70,110,155],"(ours":[71],"and":[72,79,118],"SQuAD-v2),":[73],"which":[74,156],"have":[75],"significantly":[76],"different":[77],"syntactic":[78],"semantic":[80],"scopes,":[81],"allows":[82],"achieve":[87],"even":[88],"performance.":[90],"The":[91],"best-performing":[92],"mixed":[98],"set":[100],"outperforms":[101],"models":[103],"other":[107],"by":[111,151],"scoring":[112],"an":[113,119],"exact":[114],"match":[115],"67.93%":[117],"F1":[120],"score":[121],"72.29%":[123],"when":[124],"evaluated":[125],"our":[127,162],"test":[128],"set.":[130],"This":[131],"has":[132],"important":[133],"implications":[134],"as":[135],"it":[136],"demonstrates":[137],"ability":[139],"realize":[141],"high-performing":[142],"models,":[145],"with":[146],"modest":[147],"computational":[148],"resources,":[149],"empowered":[150],"can":[157],"be":[158],"according":[160],"method.":[163]},"counts_by_year":[{"year":2026,"cited_by_count":2},{"year":2025,"cited_by_count":1}],"updated_date":"2026-05-19T08:33:51.333923","created_date":"2025-10-10T00:00:00"}
