{"id":"https://openalex.org/W2083187798","doi":"https://doi.org/10.1109/bibm.2014.6999306","title":"Adopting the MapReduce framework to pre-train 1-D and 2-D protein structure predictors with large protein datasets","display_name":"Adopting the MapReduce framework to pre-train 1-D and 2-D protein structure predictors with large protein datasets","publication_year":2014,"publication_date":"2014-11-01","ids":{"openalex":"https://openalex.org/W2083187798","doi":"https://doi.org/10.1109/bibm.2014.6999306","mag":"2083187798"},"language":"en","primary_location":{"id":"doi:10.1109/bibm.2014.6999306","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2014.6999306","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5033383597","display_name":"Jesse Eickholt","orcid":"https://orcid.org/0000-0002-1764-1838"},"institutions":[{"id":"https://openalex.org/I1629065","display_name":"Central Michigan University","ror":"https://ror.org/02xawj266","country_code":"US","type":"education","lineage":["https://openalex.org/I1629065"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jesse Eickholt","raw_affiliation_strings":["Department of Computer Science, Central Michigan University, Mount Pleasant, MI, USA","Department of Computer Science Central Michigan University Mount Pleasant MI USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Central Michigan University, Mount Pleasant, MI, USA","institution_ids":["https://openalex.org/I1629065"]},{"raw_affiliation_string":"Department of Computer Science Central Michigan University Mount Pleasant MI USA","institution_ids":["https://openalex.org/I1629065"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102909198","display_name":"Suman Karki","orcid":null},"institutions":[{"id":"https://openalex.org/I1629065","display_name":"Central Michigan University","ror":"https://ror.org/02xawj266","country_code":"US","type":"education","lineage":["https://openalex.org/I1629065"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Suman Karki","raw_affiliation_strings":["Department of Computer Science, Central Michigan University, Mount Pleasant, MI, USA","Department of Computer Science Central Michigan University Mount Pleasant MI USA"],"affiliations":[{"raw_affiliation_string":"Department of Computer Science, Central Michigan University, Mount Pleasant, MI, USA","institution_ids":["https://openalex.org/I1629065"]},{"raw_affiliation_string":"Department of Computer Science Central Michigan University Mount Pleasant MI USA","institution_ids":["https://openalex.org/I1629065"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5033383597"],"corresponding_institution_ids":["https://openalex.org/I1629065"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.09827979,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":"3","issue":null,"first_page":"23","last_page":"29"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},"topics":[{"id":"https://openalex.org/T12254","display_name":"Machine Learning in Bioinformatics","score":0.9972000122070312,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10044","display_name":"Protein Structure and Dynamics","score":0.9968000054359436,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/leverage","display_name":"Leverage (statistics)","score":0.7850878238677979},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.774128794670105},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7640557289123535},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.5853986144065857},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.5665279626846313},{"id":"https://openalex.org/keywords/sequence","display_name":"Sequence (biology)","score":0.548753559589386},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.4962623715400696},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.4704909324645996},{"id":"https://openalex.org/keywords/construct","display_name":"Construct (python library)","score":0.4578183889389038},{"id":"https://openalex.org/keywords/protein-structure-prediction","display_name":"Protein structure prediction","score":0.4140966534614563},{"id":"https://openalex.org/keywords/protein-structure","display_name":"Protein structure","score":0.3004149794578552}],"concepts":[{"id":"https://openalex.org/C153083717","wikidata":"https://www.wikidata.org/wiki/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7850878238677979},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.774128794670105},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7640557289123535},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.5853986144065857},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5665279626846313},{"id":"https://openalex.org/C2778112365","wikidata":"https://www.wikidata.org/wiki/Q3511065","display_name":"Sequence (biology)","level":2,"score":0.548753559589386},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.4962623715400696},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.4704909324645996},{"id":"https://openalex.org/C2780801425","wikidata":"https://www.wikidata.org/wiki/Q5164392","display_name":"Construct (python library)","level":2,"score":0.4578183889389038},{"id":"https://openalex.org/C18051474","wikidata":"https://www.wikidata.org/wiki/Q899656","display_name":"Protein structure prediction","level":3,"score":0.4140966534614563},{"id":"https://openalex.org/C47701112","wikidata":"https://www.wikidata.org/wiki/Q735188","display_name":"Protein structure","level":2,"score":0.3004149794578552},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C54355233","wikidata":"https://www.wikidata.org/wiki/Q7162","display_name":"Genetics","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C46141821","wikidata":"https://www.wikidata.org/wiki/Q209402","display_name":"Nuclear magnetic resonance","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bibm.2014.6999306","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bibm.2014.6999306","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2014 IEEE International Conference on Bioinformatics and Biomedicine (BIBM)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":35,"referenced_works":["https://openalex.org/W44815768","https://openalex.org/W1813659000","https://openalex.org/W1821638548","https://openalex.org/W1828921065","https://openalex.org/W1993882792","https://openalex.org/W2031767704","https://openalex.org/W2033636932","https://openalex.org/W2035018017","https://openalex.org/W2055644676","https://openalex.org/W2058715873","https://openalex.org/W2060107523","https://openalex.org/W2060422862","https://openalex.org/W2062227835","https://openalex.org/W2062920004","https://openalex.org/W2075849238","https://openalex.org/W2076048958","https://openalex.org/W2076978869","https://openalex.org/W2100495367","https://openalex.org/W2108101947","https://openalex.org/W2116064496","https://openalex.org/W2126715624","https://openalex.org/W2136922672","https://openalex.org/W2138755951","https://openalex.org/W2141125852","https://openalex.org/W2156125289","https://openalex.org/W2156465034","https://openalex.org/W2157355837","https://openalex.org/W2158714788","https://openalex.org/W2160784118","https://openalex.org/W2173213060","https://openalex.org/W2955074472","https://openalex.org/W2995564009","https://openalex.org/W3214074647","https://openalex.org/W6601785968","https://openalex.org/W6803975782"],"related_works":["https://openalex.org/W2946599741","https://openalex.org/W2593264178","https://openalex.org/W3171039768","https://openalex.org/W2136856901","https://openalex.org/W2368468053","https://openalex.org/W2058542300","https://openalex.org/W2043066834","https://openalex.org/W1564749278","https://openalex.org/W2294851134","https://openalex.org/W3215498386"],"abstract_inverted_index":{"Sequence":[0],"based":[1],"machine":[2,30],"learning":[3,31],"approaches":[4],"for":[5,81],"1-D":[6,82,131],"and":[7,76,83,132],"2-D":[8,84,133],"protein":[9,57,70,85,127,134,146],"structure":[10,58,86,135],"prediction":[11,87],"tasks":[12],"have":[13,151],"long":[14],"been":[15],"limited":[16],"by":[17],"relatively":[18],"small":[19],"datasets,":[20],"namely":[21],"proteins":[22],"with":[23,154],"experimentally":[24],"determined":[25],"structure.":[26],"Recent":[27],"advances":[28],"in":[29,53],"provide":[32],"a":[33,41,48,63,68,144],"means":[34],"of":[35,56,93,125],"using":[36,102],"unlabeled":[37],"data":[38,75,129],"and,":[39],"as":[40,114],"result,":[42],"this":[43],"opens":[44],"up":[45],"access":[46],"to":[47,66,109,121,130,142],"much":[49],"larger":[50],"sequence":[51,71,128,147],"space":[52],"the":[54,91,95,103,119],"context":[55],"prediction.":[59,136],"Here":[60],"we":[61,98,150],"present":[62],"3-stage":[64],"pipeline":[65,101,141],"construct":[67],"representative":[69],"dataset,":[72,97],"generate":[73],"training":[74],"pre-train":[77],"deep":[78],"network":[79],"models":[80],"tasks.":[88],"To":[89],"handle":[90],"complexities":[92],"managing":[94],"large":[96,123],"implemented":[99],"our":[100,140],"MapReduce":[104],"framework.":[105],"This":[106],"allowed":[107],"us":[108],"leverage":[110],"existing":[111],"tools":[112],"such":[113],"Hadoop.":[115],"The":[116],"result":[117],"is":[118],"ability":[120],"apply":[122],"amounts":[124],"novel,":[126],"We":[137],"also":[138],"used":[139],"curate":[143],"non-redundant":[145],"dataset":[148],"that":[149],"made":[152],"available":[153],"accompanying":[155],"data.":[156]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
