{"id":"https://openalex.org/W3008048863","doi":"https://doi.org/10.1109/bigdata47090.2019.9005601","title":"Human-Machine Information Extraction Simulator for Biological Collections","display_name":"Human-Machine Information Extraction Simulator for Biological Collections","publication_year":2019,"publication_date":"2019-12-01","ids":{"openalex":"https://openalex.org/W3008048863","doi":"https://doi.org/10.1109/bigdata47090.2019.9005601","mag":"3008048863"},"language":"en","primary_location":{"id":"doi:10.1109/bigdata47090.2019.9005601","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9005601","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5027673766","display_name":"Icaro Alzuru","orcid":null},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Icaro Alzuru","raw_affiliation_strings":["CISE Department, University of Florida, Gainesville, USA"],"affiliations":[{"raw_affiliation_string":"CISE Department, University of Florida, Gainesville, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5027043779","display_name":"Aditi Malladi","orcid":null},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Aditi Malladi","raw_affiliation_strings":["CISE Department, University of Florida, Gainesville, USA"],"affiliations":[{"raw_affiliation_string":"CISE Department, University of Florida, Gainesville, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5003262238","display_name":"Andr\u00e9a Matsunaga","orcid":"https://orcid.org/0000-0001-9036-5895"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Andrea Matsunaga","raw_affiliation_strings":["ACIS Lab., University of Florida, Gainesville, USA"],"affiliations":[{"raw_affiliation_string":"ACIS Lab., University of Florida, Gainesville, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5086302230","display_name":"Maur\u00edcio Tsugawa","orcid":null},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Mauricio Tsugawa","raw_affiliation_strings":["ACIS Lab., University of Florida, Gainesville, USA"],"affiliations":[{"raw_affiliation_string":"ACIS Lab., University of Florida, Gainesville, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5096823762","display_name":"Fortes Jose A.B.","orcid":null},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Fortes Jose A.B.","raw_affiliation_strings":["ACIS Lab., University of Florida, Gainesville, USA"],"affiliations":[{"raw_affiliation_string":"ACIS Lab., University of Florida, Gainesville, USA","institution_ids":["https://openalex.org/I33213144"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5027673766"],"corresponding_institution_ids":["https://openalex.org/I33213144"],"apc_list":null,"apc_paid":null,"fwci":0.8107,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":{"value":0.88156981,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":95},"biblio":{"volume":"68","issue":null,"first_page":"4565","last_page":"4572"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},"topics":[{"id":"https://openalex.org/T11986","display_name":"Scientific Computing and Data Management","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1802","display_name":"Information Systems and Management"},"field":{"id":"https://openalex.org/fields/18","display_name":"Decision Sciences"},"domain":{"id":"https://openalex.org/domains/2","display_name":"Social Sciences"}},{"id":"https://openalex.org/T11937","display_name":"Research Data Management Practices","score":0.9980999827384949,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10015","display_name":"Genomics and Phylogenetic Studies","score":0.9803000092506409,"subfield":{"id":"https://openalex.org/subfields/1312","display_name":"Molecular Biology"},"field":{"id":"https://openalex.org/fields/13","display_name":"Biochemistry, Genetics and Molecular Biology"},"domain":{"id":"https://openalex.org/domains/1","display_name":"Life Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.841974139213562},{"id":"https://openalex.org/keywords/workflow","display_name":"Workflow","score":0.8048938512802124},{"id":"https://openalex.org/keywords/crowdsourcing","display_name":"Crowdsourcing","score":0.7027860879898071},{"id":"https://openalex.org/keywords/ground-truth","display_name":"Ground truth","score":0.6827753782272339},{"id":"https://openalex.org/keywords/metadata","display_name":"Metadata","score":0.641654372215271},{"id":"https://openalex.org/keywords/process","display_name":"Process (computing)","score":0.5430682301521301},{"id":"https://openalex.org/keywords/human-in-the-loop","display_name":"Human-in-the-loop","score":0.5171117782592773},{"id":"https://openalex.org/keywords/personalization","display_name":"Personalization","score":0.5021765232086182},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.40898773074150085},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.38486993312835693},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.36720094084739685},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.36638161540031433},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.27981120347976685},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.20991680026054382},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.13876184821128845}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.841974139213562},{"id":"https://openalex.org/C177212765","wikidata":"https://www.wikidata.org/wiki/Q627335","display_name":"Workflow","level":2,"score":0.8048938512802124},{"id":"https://openalex.org/C62230096","wikidata":"https://www.wikidata.org/wiki/Q275969","display_name":"Crowdsourcing","level":2,"score":0.7027860879898071},{"id":"https://openalex.org/C146849305","wikidata":"https://www.wikidata.org/wiki/Q370766","display_name":"Ground truth","level":2,"score":0.6827753782272339},{"id":"https://openalex.org/C93518851","wikidata":"https://www.wikidata.org/wiki/Q180160","display_name":"Metadata","level":2,"score":0.641654372215271},{"id":"https://openalex.org/C98045186","wikidata":"https://www.wikidata.org/wiki/Q205663","display_name":"Process (computing)","level":2,"score":0.5430682301521301},{"id":"https://openalex.org/C2780626000","wikidata":"https://www.wikidata.org/wiki/Q5936775","display_name":"Human-in-the-loop","level":2,"score":0.5171117782592773},{"id":"https://openalex.org/C183003079","wikidata":"https://www.wikidata.org/wiki/Q1000371","display_name":"Personalization","level":2,"score":0.5021765232086182},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.40898773074150085},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38486993312835693},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.36720094084739685},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.36638161540031433},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.27981120347976685},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.20991680026054382},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.13876184821128845}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/bigdata47090.2019.9005601","is_oa":false,"landing_page_url":"https://doi.org/10.1109/bigdata47090.2019.9005601","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2019 IEEE International Conference on Big Data (Big Data)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":30,"referenced_works":["https://openalex.org/W1030645745","https://openalex.org/W1867170536","https://openalex.org/W1978714842","https://openalex.org/W2008459216","https://openalex.org/W2058153205","https://openalex.org/W2078962046","https://openalex.org/W2135749564","https://openalex.org/W2165352994","https://openalex.org/W2171313960","https://openalex.org/W2505217892","https://openalex.org/W2769563307","https://openalex.org/W2776891148","https://openalex.org/W2779326814","https://openalex.org/W2793108664","https://openalex.org/W2794248503","https://openalex.org/W2807747798","https://openalex.org/W2809631449","https://openalex.org/W2895456681","https://openalex.org/W2901476362","https://openalex.org/W2911663376","https://openalex.org/W2923014074","https://openalex.org/W2963310665","https://openalex.org/W2967402721","https://openalex.org/W2972630884","https://openalex.org/W3003915459","https://openalex.org/W3008048863","https://openalex.org/W4242710869","https://openalex.org/W4289422041","https://openalex.org/W6755284406","https://openalex.org/W6761851811"],"related_works":["https://openalex.org/W3032998312","https://openalex.org/W135177976","https://openalex.org/W4384486036","https://openalex.org/W1503094549","https://openalex.org/W2337920774","https://openalex.org/W4286908577","https://openalex.org/W2886410948","https://openalex.org/W2025875869","https://openalex.org/W4318823662","https://openalex.org/W3207526114"],"abstract_inverted_index":{"In":[0,127],"the":[1,7,25,65,68,99,137,158,160,168,171,181,212],"last":[2],"decade,":[3],"institutions":[4],"from":[5,27,67,149],"around":[6],"world":[8],"have":[9],"implemented":[10],"initiatives":[11],"for":[12,143,164,200],"digitizing":[13],"biological":[14],"collections":[15],"(biocollections)":[16],"and":[17,57,82,112,116,121,176,187,203,216],"sharing":[18],"their":[19],"information":[20],"online.":[21],"The":[22,53,153,195],"transcription":[23],"of":[24,29,51,55,70,92,151,170,180,184,218],"metadata":[26],"photographs":[28],"specimens'":[30],"labels":[31],"is":[32],"performed":[33],"through":[34],"human-centered":[35],"approaches":[36],"(e.g.,":[37],"crowdsourcing)":[38],"because":[39],"fully":[40],"automated":[41,106,172],"Information":[42],"Extraction":[43],"(IE)":[44],"methods":[45],"still":[46],"generate":[47,101],"a":[48,124,132],"significant":[49],"number":[50],"errors.":[52],"integration":[54],"human":[56],"machine":[58],"tasks":[59],"has":[60],"been":[61],"proposed":[62],"to":[63,73,79,89,118,135,139],"accelerate":[64,136],"IE":[66,86,162,173,202,220],"billions":[69],"specimens":[71],"waiting":[72],"be":[74,119],"digitized.":[75],"Nevertheless,":[76],"in":[77],"order":[78],"conduct":[80],"research":[81,110,204],"trying":[83],"new":[84],"techniques,":[85],"practitioners":[87],"need":[88],"prepare":[90],"sets":[91],"images,":[93],"crowdsourcing":[94],"experiments,":[95],"recruit":[96],"volunteers,":[97],"process":[98],"transcriptions,":[100],"ground":[102,177],"truth":[103,178],"values,":[104],"program":[105],"methods,":[107,174],"etc.":[108],"These":[109],"resources":[111],"processes":[113],"require":[114],"time":[115],"effort":[117],"developed":[120],"architected":[122],"into":[123],"functional":[125],"system.":[126],"this":[128],"paper,":[129],"we":[130],"present":[131],"simulator":[133,196],"intended":[134],"ability":[138],"experiment":[140],"with":[141],"workflows":[142,163],"extracting":[144],"Darwin":[145],"Core":[146],"(DC)":[147],"terms":[148,183],"images":[150],"specimens.":[152],"so-called":[154],"HuMaIN":[155],"Simulator":[156],"includes":[157],"engine,":[159],"human-machine":[161],"three":[165,185],"DC":[166,182],"terms,":[167],"code":[169],"crowdsourced":[175],"transcriptions":[179],"biocollections,":[186],"several":[188],"experiments":[189],"that":[190],"exemplify":[191],"its":[192],"potential":[193],"use.":[194],"adds":[197],"Human-in-the-loop":[198],"capabilities,":[199],"iterative":[201],"on":[205],"optimal":[206],"methods.":[207],"Its":[208],"practical":[209],"design":[210],"permits":[211],"quick":[213],"definition,":[214],"customization,":[215],"implementation":[217],"experimental":[219],"scenarios.":[221]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
