{"id":"https://openalex.org/W4221114044","doi":"https://doi.org/10.23919/icact53585.2022.9728823","title":"An Extensible Parsing Pipeline for Unstructured Data Processing","display_name":"An Extensible Parsing Pipeline for Unstructured Data Processing","publication_year":2022,"publication_date":"2022-02-13","ids":{"openalex":"https://openalex.org/W4221114044","doi":"https://doi.org/10.23919/icact53585.2022.9728823"},"language":"en","primary_location":{"id":"doi:10.23919/icact53585.2022.9728823","is_oa":false,"landing_page_url":"https://doi.org/10.23919/icact53585.2022.9728823","pdf_url":null,"source":{"id":"https://openalex.org/S4363608017","display_name":"2022 24th International Conference on Advanced Communication Technology (ICACT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 24th International Conference on Advanced Communication Technology (ICACT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005203615","display_name":"Shubham Jain","orcid":"https://orcid.org/0000-0002-0913-3948"},"institutions":[{"id":"https://openalex.org/I151939572","display_name":"Athlone Institute of Technology","ror":"https://ror.org/02dyxwz31","country_code":"IE","type":"education","lineage":["https://openalex.org/I151939572"]}],"countries":["IE"],"is_corresponding":true,"raw_author_name":"Shubham Jain","raw_affiliation_strings":["Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","Software Research Institute, Athlone Institute of Technology, Athlone, Ireland"],"affiliations":[{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","institution_ids":["https://openalex.org/I151939572"]},{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology, Athlone, Ireland","institution_ids":["https://openalex.org/I151939572"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082979434","display_name":"Amy de Buitl\u00e9ir","orcid":"https://orcid.org/0000-0001-8359-0920"},"institutions":[{"id":"https://openalex.org/I4210150373","display_name":"Ericsson (Ireland)","ror":"https://ror.org/04swdfh12","country_code":"IE","type":"company","lineage":["https://openalex.org/I1306339040","https://openalex.org/I4210150373"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Amy de Buitl\u00e9ir","raw_affiliation_strings":["Network Management Lab, Ericsson,Athlone,Ireland","Network Management Lab, Ericsson, Athlone, Ireland"],"affiliations":[{"raw_affiliation_string":"Network Management Lab, Ericsson,Athlone,Ireland","institution_ids":["https://openalex.org/I4210150373"]},{"raw_affiliation_string":"Network Management Lab, Ericsson, Athlone, Ireland","institution_ids":["https://openalex.org/I4210150373"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070072452","display_name":"Enda Fallon","orcid":"https://orcid.org/0000-0002-8300-5813"},"institutions":[{"id":"https://openalex.org/I151939572","display_name":"Athlone Institute of Technology","ror":"https://ror.org/02dyxwz31","country_code":"IE","type":"education","lineage":["https://openalex.org/I151939572"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Enda Fallon","raw_affiliation_strings":["Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","Software Research Institute, Athlone Institute of Technology, Athlone, Ireland"],"affiliations":[{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","institution_ids":["https://openalex.org/I151939572"]},{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology, Athlone, Ireland","institution_ids":["https://openalex.org/I151939572"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https://openalex.org/A5005203615"],"corresponding_institution_ids":["https://openalex.org/I151939572"],"apc_list":null,"apc_paid":null,"fwci":0.2149,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.25342371,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"312","last_page":"318"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.984499990940094,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9836999773979187,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9039576053619385},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.855368435382843},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7942534685134888},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.6690737009048462},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6190482974052429},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5382897853851318},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.45676612854003906},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4324713349342346},{"id":"https://openalex.org/keywords/plain-text","display_name":"Plain text","score":0.4318324327468872},{"id":"https://openalex.org/keywords/file-format","display_name":"File format","score":0.41542378067970276},{"id":"https://openalex.org/keywords/data-file","display_name":"Data file","score":0.4110684096813202},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.33460092544555664},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3108653724193573},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.2900340259075165},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17581024765968323},{"id":"https://openalex.org/keywords/encryption","display_name":"Encryption","score":0.10156741738319397}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9039576053619385},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.855368435382843},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7942534685134888},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.6690737009048462},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6190482974052429},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5382897853851318},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.45676612854003906},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4324713349342346},{"id":"https://openalex.org/C46503548","wikidata":"https://www.wikidata.org/wiki/Q1145976","display_name":"Plain text","level":3,"score":0.4318324327468872},{"id":"https://openalex.org/C97250363","wikidata":"https://www.wikidata.org/wiki/Q235557","display_name":"File format","level":2,"score":0.41542378067970276},{"id":"https://openalex.org/C171730128","wikidata":"https://www.wikidata.org/wiki/Q5227290","display_name":"Data file","level":2,"score":0.4110684096813202},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.33460092544555664},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3108653724193573},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2900340259075165},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17581024765968323},{"id":"https://openalex.org/C148730421","wikidata":"https://www.wikidata.org/wiki/Q141090","display_name":"Encryption","level":2,"score":0.10156741738319397},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.23919/icact53585.2022.9728823","is_oa":false,"landing_page_url":"https://doi.org/10.23919/icact53585.2022.9728823","pdf_url":null,"source":{"id":"https://openalex.org/S4363608017","display_name":"2022 24th International Conference on Advanced Communication Technology (ICACT)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"conference"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2022 24th International Conference on Advanced Communication Technology (ICACT)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.6600000262260437,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321056","display_name":"Irish Research Council","ror":"https://ror.org/051xex213"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":24,"referenced_works":["https://openalex.org/W65738273","https://openalex.org/W168564468","https://openalex.org/W205885184","https://openalex.org/W1647671624","https://openalex.org/W1661413208","https://openalex.org/W1969256433","https://openalex.org/W2001619934","https://openalex.org/W2070771761","https://openalex.org/W2102632804","https://openalex.org/W2103868202","https://openalex.org/W2153470728","https://openalex.org/W2181347089","https://openalex.org/W2481003398","https://openalex.org/W2540904922","https://openalex.org/W2560021099","https://openalex.org/W2951464182","https://openalex.org/W3054225939","https://openalex.org/W3090556797","https://openalex.org/W3108996037","https://openalex.org/W4237335675","https://openalex.org/W4399569220","https://openalex.org/W6636915900","https://openalex.org/W6681226105","https://openalex.org/W6869762939"],"related_works":["https://openalex.org/W3032858184","https://openalex.org/W2793346709","https://openalex.org/W2090798606","https://openalex.org/W3197228483","https://openalex.org/W2566091055","https://openalex.org/W1966326070","https://openalex.org/W212602161","https://openalex.org/W2368195534","https://openalex.org/W2377810582","https://openalex.org/W2560797684"],"abstract_inverted_index":{"Network":[0],"monitoring":[1],"and":[2,10,22,39,59,75,99,123,128,132,149],"diagnostics":[3],"systems":[4],"depict":[5],"the":[6,40,115,137,140],"running":[7],"system\u2019s":[8],"state":[9],"generate":[11],"enormous":[12],"amounts":[13],"of":[14,139],"unstructured":[15,48,65],"data":[16,49,66,116,154],"through":[17],"log":[18],"files,":[19,92],"print":[20],"statements,":[21],"other":[23],"reports.":[24],"It":[25],"is":[26,110,124],"not":[27,119],"feasible":[28],"to":[29,36,42,46,63,112,135,152],"manually":[30],"analyze":[31],"all":[32],"these":[33,71],"files":[34],"due":[35],"limited":[37],"resources":[38],"need":[41,76],"develop":[43],"custom":[44],"parsers":[45],"convert":[47],"into":[50,67,102],"desirable":[51],"file":[52,69,105],"formats.":[53],"Prior":[54],"research":[55],"focuses":[56],"on":[57],"rule-based":[58],"relationship-based":[60],"parsing":[61],"methods":[62,72],"parse":[64],"structured":[68,104],"formats;":[70],"are":[73],"labor-intensive":[74],"large":[77],"annotated":[78],"datasets.":[79],"This":[80],"paper":[81],"presents":[82],"an":[83],"unsupervised":[84],"text":[85,91],"processing":[86],"pipeline":[87],"that":[88,146],"analyses":[89],"such":[90],"removes":[93],"extraneous":[94],"information,":[95],"identifies":[96],"tabular":[97],"components,":[98],"parses":[100],"them":[101],"a":[103],"format.":[106],"The":[107],"proposed":[108,141],"approach":[109],"resilient":[111],"changes":[113],"in":[114],"structure,":[117],"does":[118],"require":[120],"training":[121],"data,":[122],"domain-independent.":[125],"We":[126],"experiment":[127],"compare":[129],"topic":[130,160],"modeling":[131],"clustering":[133,150],"approaches":[134],"verify":[136],"accuracy":[138,158],"technique.":[142],"Our":[143],"findings":[144],"indicate":[145],"combining":[147],"similarity":[148],"algorithms":[151],"identify":[153],"components":[155],"had":[156],"better":[157],"than":[159],"modeling.":[161]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-10-10T00:00:00"}
