{"id":"https://openalex.org/W3185461526","doi":"https://doi.org/10.3390/data6070078","title":"Multi-Layout Invoice Document Dataset (MIDD): A Dataset for Named Entity Recognition","display_name":"Multi-Layout Invoice Document Dataset (MIDD): A Dataset for Named Entity Recognition","publication_year":2021,"publication_date":"2021-07-20","ids":{"openalex":"https://openalex.org/W3185461526","doi":"https://doi.org/10.3390/data6070078","mag":"3185461526"},"language":"en","primary_location":{"id":"doi:10.3390/data6070078","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data6070078","pdf_url":"https://www.mdpi.com/2306-5729/6/7/78/pdf?version=1627298386","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"type":"article","indexed_in":["crossref","doaj"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.mdpi.com/2306-5729/6/7/78/pdf?version=1627298386","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5034807231","display_name":"Dipali Baviskar","orcid":"https://orcid.org/0000-0001-6351-8737"},"institutions":[{"id":"https://openalex.org/I244572783","display_name":"Symbiosis International University","ror":"https://ror.org/005r2ww51","country_code":"IN","type":"education","lineage":["https://openalex.org/I244572783"]}],"countries":["IN"],"is_corresponding":false,"raw_author_name":"Dipali Baviskar","raw_affiliation_strings":["Symbiosis Institute of Technology, Symbiosis International (Deemed University), Pune 412115, India"],"affiliations":[{"raw_affiliation_string":"Symbiosis Institute of Technology, Symbiosis International (Deemed University), Pune 412115, India","institution_ids":["https://openalex.org/I244572783"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5010638862","display_name":"Swati Ahirrao","orcid":"https://orcid.org/0000-0002-8554-2129"},"institutions":[{"id":"https://openalex.org/I244572783","display_name":"Symbiosis International University","ror":"https://ror.org/005r2ww51","country_code":"IN","type":"education","lineage":["https://openalex.org/I244572783"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Swati Ahirrao","raw_affiliation_strings":["Symbiosis Institute of Technology, Symbiosis International (Deemed University), Pune 412115, India"],"affiliations":[{"raw_affiliation_string":"Symbiosis Institute of Technology, Symbiosis International (Deemed University), Pune 412115, India","institution_ids":["https://openalex.org/I244572783"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077770842","display_name":"Ketan Kotecha","orcid":"https://orcid.org/0000-0003-2653-3780"},"institutions":[{"id":"https://openalex.org/I244572783","display_name":"Symbiosis International University","ror":"https://ror.org/005r2ww51","country_code":"IN","type":"education","lineage":["https://openalex.org/I244572783"]}],"countries":["IN"],"is_corresponding":true,"raw_author_name":"Ketan Kotecha","raw_affiliation_strings":["Symbiosis Centre for Applied Artificial Intelligence, Symbiosis International (Deemed University), Pune 412115, India"],"affiliations":[{"raw_affiliation_string":"Symbiosis Centre for Applied Artificial Intelligence, Symbiosis International (Deemed University), Pune 412115, India","institution_ids":["https://openalex.org/I244572783"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5010638862","https://openalex.org/A5077770842"],"corresponding_institution_ids":["https://openalex.org/I244572783"],"apc_list":{"value":1600,"currency":"CHF","value_usd":1732},"apc_paid":{"value":1600,"currency":"CHF","value_usd":1732},"fwci":1.2596,"has_fulltext":true,"cited_by_count":11,"citation_normalized_percentile":{"value":0.83764976,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":94,"max":98},"biblio":{"volume":"6","issue":"7","first_page":"78","last_page":"78"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9991999864578247,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9886999726295471,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9884999990463257,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8296750783920288},{"id":"https://openalex.org/keywords/invoice","display_name":"Invoice","score":0.7295071482658386},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.6868350505828857},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6738388538360596},{"id":"https://openalex.org/keywords/key","display_name":"Key (lock)","score":0.6269395351409912},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.5816667675971985},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.5620614886283875},{"id":"https://openalex.org/keywords/quality","display_name":"Quality (philosophy)","score":0.5519586801528931},{"id":"https://openalex.org/keywords/named-entity-recognition","display_name":"Named-entity recognition","score":0.517333447933197},{"id":"https://openalex.org/keywords/legal-document","display_name":"Legal document","score":0.4676729142665863},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.38788050413131714},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3833869695663452},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3653629422187805},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.332115113735199},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.16217640042304993}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8296750783920288},{"id":"https://openalex.org/C2781456945","wikidata":"https://www.wikidata.org/wiki/Q190581","display_name":"Invoice","level":2,"score":0.7295071482658386},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.6868350505828857},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6738388538360596},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.6269395351409912},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.5816667675971985},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.5620614886283875},{"id":"https://openalex.org/C2779530757","wikidata":"https://www.wikidata.org/wiki/Q1207505","display_name":"Quality (philosophy)","level":2,"score":0.5519586801528931},{"id":"https://openalex.org/C2779135771","wikidata":"https://www.wikidata.org/wiki/Q403574","display_name":"Named-entity recognition","level":3,"score":0.517333447933197},{"id":"https://openalex.org/C2993995455","wikidata":"https://www.wikidata.org/wiki/Q3150005","display_name":"Legal document","level":2,"score":0.4676729142665863},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.38788050413131714},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3833869695663452},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3653629422187805},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.332115113735199},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.16217640042304993},{"id":"https://openalex.org/C187736073","wikidata":"https://www.wikidata.org/wiki/Q2920921","display_name":"Management","level":1,"score":0.0},{"id":"https://openalex.org/C38652104","wikidata":"https://www.wikidata.org/wiki/Q3510521","display_name":"Computer security","level":1,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C111472728","wikidata":"https://www.wikidata.org/wiki/Q9471","display_name":"Epistemology","level":1,"score":0.0}],"mesh":[],"locations_count":4,"locations":[{"id":"doi:10.3390/data6070078","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data6070078","pdf_url":"https://www.mdpi.com/2306-5729/6/7/78/pdf?version=1627298386","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},{"id":"pmh:oai:RePEc:gam:jdataj:v:6:y:2021:i:7:p:78-:d:597539","is_oa":false,"landing_page_url":"https://www.mdpi.com/2306-5729/6/7/78/","pdf_url":null,"source":{"id":"https://openalex.org/S4306401271","display_name":"RePEc: Research Papers in Economics","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I77793887","host_organization_name":"Federal Reserve Bank of St. Louis","host_organization_lineage":["https://openalex.org/I77793887"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},{"id":"pmh:oai:doaj.org/article:4c7b040064a94d70922538197d1217c0","is_oa":true,"landing_page_url":"https://doaj.org/article/4c7b040064a94d70922538197d1217c0","pdf_url":null,"source":{"id":"https://openalex.org/S112646816","display_name":"SHILAP Revista de lepidopterolog\u00eda","issn_l":"0300-5267","issn":["0300-5267","2340-4078"],"is_oa":true,"is_in_doaj":true,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"journal"},"license":"cc-by-sa","license_id":"https://openalex.org/licenses/cc-by-sa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data, Vol 6, Iss 7, p 78 (2021)","raw_type":"article"},{"id":"pmh:oai:mdpi.com:/2306-5729/6/7/78/","is_oa":true,"landing_page_url":"https://dx.doi.org/10.3390/data6070078","pdf_url":null,"source":{"id":"https://openalex.org/S4306400947","display_name":"MDPI (MDPI AG)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I4210097602","host_organization_name":"Multidisciplinary Digital Publishing Institute (Switzerland)","host_organization_lineage":["https://openalex.org/I4210097602"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Data; Volume 6; Issue 7; Pages: 78","raw_type":"Text"}],"best_oa_location":{"id":"doi:10.3390/data6070078","is_oa":true,"landing_page_url":"https://doi.org/10.3390/data6070078","pdf_url":"https://www.mdpi.com/2306-5729/6/7/78/pdf?version=1627298386","source":{"id":"https://openalex.org/S4210226510","display_name":"Data","issn_l":"2306-5729","issn":["2306-5729"],"is_oa":true,"is_in_doaj":true,"is_core":true,"host_organization":"https://openalex.org/P4310310987","host_organization_name":"Multidisciplinary Digital Publishing Institute","host_organization_lineage":["https://openalex.org/P4310310987"],"host_organization_lineage_names":["Multidisciplinary Digital Publishing Institute"],"type":"journal"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Data","raw_type":"journal-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3185461526.pdf","grobid_xml":"https://content.openalex.org/works/W3185461526.grobid-xml"},"referenced_works_count":23,"referenced_works":["https://openalex.org/W1997754546","https://openalex.org/W2537675719","https://openalex.org/W2935408319","https://openalex.org/W2963687456","https://openalex.org/W2965009206","https://openalex.org/W2965516373","https://openalex.org/W2966776429","https://openalex.org/W2971987430","https://openalex.org/W2981089724","https://openalex.org/W2983390508","https://openalex.org/W2994804941","https://openalex.org/W2995151999","https://openalex.org/W3003261556","https://openalex.org/W3003484198","https://openalex.org/W3004330198","https://openalex.org/W3085008468","https://openalex.org/W3091806362","https://openalex.org/W3109605947","https://openalex.org/W3122950209","https://openalex.org/W3155464240","https://openalex.org/W3180901325","https://openalex.org/W3198984549","https://openalex.org/W6650012010"],"related_works":["https://openalex.org/W2334378031","https://openalex.org/W2916255597","https://openalex.org/W2030910246","https://openalex.org/W3091569222","https://openalex.org/W4241018868","https://openalex.org/W2999302224","https://openalex.org/W1495833002","https://openalex.org/W4389912246","https://openalex.org/W3006227201","https://openalex.org/W2075635421"],"abstract_inverted_index":{"The":[0],"day-to-day":[1],"working":[2],"of":[3,10,16,66,84,100],"an":[4,147],"organization":[5],"produces":[6],"a":[7,53,93,115],"massive":[8],"volume":[9],"unstructured":[11,34,50,79,89,105,130,140],"data":[12],"in":[13,32,59,158],"the":[14,29,57,64,74,82,98,110,135,159,186],"form":[15],"invoices,":[17],"legal":[18],"contracts,":[19],"mortgage":[20],"processing":[21,143],"forms,":[22],"and":[23,42,48,69,87,120,144,154,196],"many":[24],"more.":[25],"Organizations":[26],"can":[27,72,133],"utilize":[28],"insights":[30,44],"concealed":[31],"such":[33,46],"documents":[35,51,123],"for":[36,96,125,138],"their":[37],"operational":[38],"benefit.":[39],"However,":[40,81],"analyzing":[41],"extracting":[43,101,126],"from":[45,78,104,129,174],"numerous":[47],"complex":[49],"is":[52,62,92,185],"tedious":[54],"task.":[55],"Hence,":[56],"research":[58],"this":[60],"area":[61],"encouraging":[63],"development":[65],"novel":[67],"frameworks":[68],"tools":[70],"that":[71],"automate":[73],"key":[75,102,127],"information":[76,103,128],"extraction":[77],"documents.":[80,106,131,161,199],"availability":[83],"standard,":[85],"best-quality,":[86],"annotated":[88,121,197],"document":[90,142,167],"datasets":[91],"serious":[94],"challenge":[95],"accomplishing":[97],"goal":[99],"This":[107],"work":[108],"expedites":[109],"researcher\u2019s":[111],"task":[112],"by":[113],"providing":[114],"high-quality,":[116,192],"highly":[117,193],"diverse,":[118,194],"multi-layout,":[119,195],"invoice":[122,141,160,166,183,198],"dataset":[124,137,163,184,190],"Researchers":[132],"use":[134],"proposed":[136],"layout-independent":[139],"to":[145,152],"develop":[146],"artificial":[148],"intelligence":[149],"(AI)-based":[150],"tool":[151],"identify":[153],"extract":[155],"named":[156],"entities":[157],"Our":[162],"includes":[164],"630":[165],"PDFs":[168],"with":[169],"four":[170],"different":[171],"layouts":[172],"collected":[173],"diverse":[175],"suppliers.":[176],"As":[177],"far":[178],"as":[179],"we":[180],"know,":[181],"our":[182],"only":[187],"openly":[188],"available":[189],"comprising":[191]},"counts_by_year":[{"year":2025,"cited_by_count":2},{"year":2024,"cited_by_count":5},{"year":2023,"cited_by_count":2},{"year":2022,"cited_by_count":2}],"updated_date":"2026-04-05T17:49:38.594831","created_date":"2025-10-10T00:00:00"}
