{"id":"https://openalex.org/W3015133989","doi":"https://doi.org/10.5220/0008925104150424","title":"Supervised Machine Learning and Feature Selection for a Document Analysis Application","display_name":"Supervised Machine Learning and Feature Selection for a Document Analysis Application","publication_year":2020,"publication_date":"2020-01-01","ids":{"openalex":"https://openalex.org/W3015133989","doi":"https://doi.org/10.5220/0008925104150424","mag":"3015133989"},"language":"en","primary_location":{"id":"doi:10.5220/0008925104150424","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0008925104150424","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.5220/0008925104150424","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5002563560","display_name":"James Pope","orcid":"https://orcid.org/0000-0003-2656-363X"},"institutions":[{"id":"https://openalex.org/I3301702","display_name":"Stephens College","ror":"https://ror.org/0305v8773","country_code":"US","type":"education","lineage":["https://openalex.org/I3301702"]},{"id":"https://openalex.org/I6066122","display_name":"University of Montevallo","ror":"https://ror.org/01fd8g905","country_code":"US","type":"education","lineage":["https://openalex.org/I6066122"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"James Pope","raw_affiliation_strings":["Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---","institution_ids":["https://openalex.org/I6066122","https://openalex.org/I3301702"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111815096","display_name":"Daniel Powers","orcid":null},"institutions":[{"id":"https://openalex.org/I3301702","display_name":"Stephens College","ror":"https://ror.org/0305v8773","country_code":"US","type":"education","lineage":["https://openalex.org/I3301702"]},{"id":"https://openalex.org/I6066122","display_name":"University of Montevallo","ror":"https://ror.org/01fd8g905","country_code":"US","type":"education","lineage":["https://openalex.org/I6066122"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel Powers","raw_affiliation_strings":["Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---","institution_ids":["https://openalex.org/I6066122","https://openalex.org/I3301702"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5050367082","display_name":"Jewel Connell","orcid":null},"institutions":[{"id":"https://openalex.org/I6066122","display_name":"University of Montevallo","ror":"https://ror.org/01fd8g905","country_code":"US","type":"education","lineage":["https://openalex.org/I6066122"]},{"id":"https://openalex.org/I3301702","display_name":"Stephens College","ror":"https://ror.org/0305v8773","country_code":"US","type":"education","lineage":["https://openalex.org/I3301702"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"J. A. (Jim) Connell","raw_affiliation_strings":["Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---","institution_ids":["https://openalex.org/I6066122","https://openalex.org/I3301702"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5008705043","display_name":"Milad Jasemi","orcid":null},"institutions":[{"id":"https://openalex.org/I3301702","display_name":"Stephens College","ror":"https://ror.org/0305v8773","country_code":"US","type":"education","lineage":["https://openalex.org/I3301702"]},{"id":"https://openalex.org/I6066122","display_name":"University of Montevallo","ror":"https://ror.org/01fd8g905","country_code":"US","type":"education","lineage":["https://openalex.org/I6066122"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Milad Jasemi","raw_affiliation_strings":["Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"Stephens College of Business, University of Montevallo, U.S.A., --- Select a Country ---","institution_ids":["https://openalex.org/I6066122","https://openalex.org/I3301702"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101580132","display_name":"David G. Taylor","orcid":"https://orcid.org/0000-0002-3393-0293"},"institutions":[{"id":"https://openalex.org/I94658018","display_name":"University of Memphis","ror":"https://ror.org/01cq23130","country_code":"US","type":"education","lineage":["https://openalex.org/I94658018"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"David Taylor","raw_affiliation_strings":["University of Memphis, U.S.A., --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"University of Memphis, U.S.A., --- Select a Country ---","institution_ids":["https://openalex.org/I94658018"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5018416804","display_name":"Xenofon Fafoutis","orcid":"https://orcid.org/0000-0002-9871-0013"},"institutions":[{"id":"https://openalex.org/I96673099","display_name":"Technical University of Denmark","ror":"https://ror.org/04qtj9h94","country_code":"DK","type":"education","lineage":["https://openalex.org/I96673099"]}],"countries":["DK"],"is_corresponding":false,"raw_author_name":"Xenofon Fafoutis","raw_affiliation_strings":["DTU Compute, Technical University of Denmark, Denmark, --- Select a Country ---"],"affiliations":[{"raw_affiliation_string":"DTU Compute, Technical University of Denmark, Denmark, --- Select a Country ---","institution_ids":["https://openalex.org/I96673099"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5002563560"],"corresponding_institution_ids":["https://openalex.org/I3301702","https://openalex.org/I6066122"],"apc_list":null,"apc_paid":null,"fwci":0.1962,"has_fulltext":false,"cited_by_count":2,"citation_normalized_percentile":{"value":0.47890243,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"415","last_page":"424"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10601","display_name":"Handwritten Text Recognition Techniques","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12016","display_name":"Web Data Mining and Analysis","score":0.9914000034332275,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11550","display_name":"Text and Document Classification Technologies","score":0.9908999800682068,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7731409072875977},{"id":"https://openalex.org/keywords/feature-selection","display_name":"Feature selection","score":0.6934992671012878},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6694874167442322},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.6161918044090271},{"id":"https://openalex.org/keywords/selection","display_name":"Selection (genetic algorithm)","score":0.5845127701759338},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.44893017411231995},{"id":"https://openalex.org/keywords/supervised-learning","display_name":"Supervised learning","score":0.41741943359375},{"id":"https://openalex.org/keywords/pattern-recognition","display_name":"Pattern recognition (psychology)","score":0.3507053852081299},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.33962953090667725},{"id":"https://openalex.org/keywords/artificial-neural-network","display_name":"Artificial neural network","score":0.1253272294998169}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7731409072875977},{"id":"https://openalex.org/C148483581","wikidata":"https://www.wikidata.org/wiki/Q446488","display_name":"Feature selection","level":2,"score":0.6934992671012878},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6694874167442322},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.6161918044090271},{"id":"https://openalex.org/C81917197","wikidata":"https://www.wikidata.org/wiki/Q628760","display_name":"Selection (genetic algorithm)","level":2,"score":0.5845127701759338},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.44893017411231995},{"id":"https://openalex.org/C136389625","wikidata":"https://www.wikidata.org/wiki/Q334384","display_name":"Supervised learning","level":3,"score":0.41741943359375},{"id":"https://openalex.org/C153180895","wikidata":"https://www.wikidata.org/wiki/Q7148389","display_name":"Pattern recognition (psychology)","level":2,"score":0.3507053852081299},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.33962953090667725},{"id":"https://openalex.org/C50644808","wikidata":"https://www.wikidata.org/wiki/Q192776","display_name":"Artificial neural network","level":2,"score":0.1253272294998169},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.5220/0008925104150424","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0008925104150424","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods","raw_type":"proceedings-article"},{"id":"pmh:oai:pure.atira.dk:publications/dc2ed1e6-b80e-4d7d-9d2d-2e61837fe873","is_oa":true,"landing_page_url":"https://orbit.dtu.dk/en/publications/dc2ed1e6-b80e-4d7d-9d2d-2e61837fe873","pdf_url":null,"source":{"id":"https://openalex.org/S4306400705","display_name":"Technical University of Denmark, DTU Orbit (Technical University of Denmark, DTU)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I96673099","host_organization_name":"Technical University of Denmark","host_organization_lineage":["https://openalex.org/I96673099"],"host_organization_lineage_names":[],"type":"repository"},"license":"other-oa","license_id":"https://openalex.org/licenses/other-oa","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Pope , J , Powers , D , Connell , J A , Taylor , D &amp; Fafoutis , X 2020 , Supervised Machine Learning and Feature Selection for a Document Analysis Application . in Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods . SCITEPRESS Digital Library , pp. 415 - 424 , 9th International Conference on Pattern Recognition Applications and Methods , Valletta , Malta , 22/02/2020 . https://doi.org/10.5220/0008925104150424","raw_type":"contributionToPeriodical"},{"id":"pmh:oai:research-information.bris.ac.uk:openaire_cris_publications/5f3f534c-35e8-45a2-a9f1-5ce80a2fb12c","is_oa":false,"landing_page_url":"https://research-information.bris.ac.uk/en/publications/5f3f534c-35e8-45a2-a9f1-5ce80a2fb12c","pdf_url":null,"source":{"id":"https://openalex.org/S4306400895","display_name":"Bristol Research (University of Bristol)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I36234482","host_organization_name":"University of Bristol","host_organization_lineage":["https://openalex.org/I36234482"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Pope, J, Powers, D, Connell, J A J, Jasemi, M, Taylor, D & Fafoutis, X 2020, Supervised machine learning and feature selection for a document analysis application. in M De Marsico, G S di Baja & A Fred (eds), ICPRAM 2020 - Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods. ICPRAM 2020 - Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods, SciTePress, pp. 415-424.","raw_type":"contributionToPeriodical"}],"best_oa_location":{"id":"doi:10.5220/0008925104150424","is_oa":true,"landing_page_url":"https://doi.org/10.5220/0008925104150424","pdf_url":null,"source":null,"license":"cc-by-nc-nd","license_id":"https://openalex.org/licenses/cc-by-nc-nd","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 9th International Conference on Pattern Recognition Applications and Methods","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":12,"referenced_works":["https://openalex.org/W478671","https://openalex.org/W58456790","https://openalex.org/W2067755917","https://openalex.org/W2122825543","https://openalex.org/W2135046866","https://openalex.org/W2135164809","https://openalex.org/W2135231474","https://openalex.org/W2152731093","https://openalex.org/W2787423362","https://openalex.org/W3014294420","https://openalex.org/W3204438512","https://openalex.org/W4294541781"],"related_works":["https://openalex.org/W4205762803","https://openalex.org/W2535856026","https://openalex.org/W2265065644","https://openalex.org/W3147584709","https://openalex.org/W2134699697","https://openalex.org/W3017188156","https://openalex.org/W2322875716","https://openalex.org/W2977677679","https://openalex.org/W4386564352","https://openalex.org/W2952668426"],"abstract_inverted_index":{"Over":[0],"the":[1,23,43,66,91,144,165,169,185,195,198],"past":[2],"three":[3],"decades":[4],"large":[5],"amounts":[6],"of":[7,129,197],"information":[8,46],"have":[9,112,135],"been":[10,113],"converted":[11],"to":[12,45,155,163,183,193],"image":[13,32,44],"formats":[14],"from":[15,27,42,143],"paper":[16],"documents.":[17],"Though":[18],"in":[19,206],"digital":[20],"form,":[21],"extracting":[22],"information,":[24],"usually":[25],"textual,":[26],"these":[28],"documents":[29,111],"requires":[30],"complex":[31,71],"processing":[33,40],"and":[34,56,76,94,121,125,131,133,147,180],"optical":[35],"character":[36],"recognition":[37],"techniques.":[38],"The":[39,110,151],"pipeline":[41],"typically":[47,127],"includes":[48],"an":[49],"orientation":[50,92],"correction":[51,93],"task,":[52,55],"document":[53,64,87,95,199],"identification":[54,96],"text":[57,130],"analysis":[58,88],"task.":[59],"When":[60],"there":[61],"are":[62,122,141,153,160],"many":[63],"variants":[65,120],"tasks":[67],"become":[68],"difficult":[69],"requiring":[70],"sub-analysis":[72],"for":[73,105,171],"each":[74,172],"variant":[75],"quickly":[77],"exceeds":[78],"human":[79],"capability.":[80],"In":[81],"this":[82],"work,":[83],"we":[84],"demonstrate":[85],"a":[86,106],"application":[89],"with":[90,118,209],"task":[97,173],"carried":[98],"out":[99],"by":[100],"supervised":[101],"machine":[102],"learning":[103],"techniques":[104],"large,":[107],"international":[108],"airline.":[109],"amassed":[114],"over":[115],"forty":[116],"years":[117],"numerous":[119],"mostly":[123],"black":[124],"white,":[126],"consist":[128],"lines,":[132],"some":[134],"extensive":[136],"noise.":[137],"Low":[138],"level":[139],"symbols":[140],"extracted":[142],"raw":[145],"images":[146],"separated":[148],"into":[149],"partitions.":[150],"partitions":[152],"used":[154,162],"generate":[156],"statistical":[157],"features":[158],"which":[159],"then":[161],"train":[164],"classifiers.":[166,201],"We":[167,188],"compare":[168],"classifiers":[170],"(e.g.":[174],"decision":[175],"tree,":[176],"support":[177],"vector":[178],"machine,":[179],"random":[181],"forest)":[182],"choose":[184],"most":[186],"appropriate.":[187],"also":[189],"perform":[190],"feature":[191],"selection":[192],"reduce":[194],"complexity":[196],"type":[200],"These":[202],"parsimonious":[203],"models":[204],"result":[205],"comparable":[207],"accuracy":[208],"80%":[210],"or":[211],"fewer":[212],"features.":[213]},"counts_by_year":[{"year":2022,"cited_by_count":1},{"year":2020,"cited_by_count":1}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}
