{"id":"https://openalex.org/W3133657745","doi":"https://doi.org/10.23919/icact51234.2021.9370654","title":"An Extensible Parsing Pipeline for Unstructured Data Processing","display_name":"An Extensible Parsing Pipeline for Unstructured Data Processing","publication_year":2021,"publication_date":"2021-02-07","ids":{"openalex":"https://openalex.org/W3133657745","doi":"https://doi.org/10.23919/icact51234.2021.9370654","mag":"3133657745"},"language":"en","primary_location":{"id":"doi:10.23919/icact51234.2021.9370654","is_oa":false,"landing_page_url":"https://doi.org/10.23919/icact51234.2021.9370654","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 23rd International Conference on Advanced Communication Technology (ICACT)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://research.thea.ie/bitstream/20.500.12065/3555/1/An%20extendible%20parsing%20pipeline%20....pdf","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5005203615","display_name":"Shubham Jain","orcid":"https://orcid.org/0000-0002-0913-3948"},"institutions":[{"id":"https://openalex.org/I151939572","display_name":"Athlone Institute of Technology","ror":"https://ror.org/02dyxwz31","country_code":"IE","type":"education","lineage":["https://openalex.org/I151939572"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Shubham Jain","raw_affiliation_strings":["Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","Software Research Institute, Athlone Institute of Technology, Athlone, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","institution_ids":["https://openalex.org/I151939572"]},{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology, Athlone, Ireland","institution_ids":["https://openalex.org/I151939572"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082979434","display_name":"Amy de Buitl\u00e9ir","orcid":"https://orcid.org/0000-0001-8359-0920"},"institutions":[{"id":"https://openalex.org/I4210150373","display_name":"Ericsson (Ireland)","ror":"https://ror.org/04swdfh12","country_code":"IE","type":"company","lineage":["https://openalex.org/I1306339040","https://openalex.org/I4210150373"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Amy de Buitl\u00e9ir","raw_affiliation_strings":["Network Management Lab,Ericsson,Athlone,Ireland","Network Management Lab, Ericsson, Athlone, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Network Management Lab,Ericsson,Athlone,Ireland","institution_ids":["https://openalex.org/I4210150373"]},{"raw_affiliation_string":"Network Management Lab, Ericsson, Athlone, Ireland","institution_ids":["https://openalex.org/I4210150373"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5070072452","display_name":"Enda Fallon","orcid":"https://orcid.org/0000-0002-8300-5813"},"institutions":[{"id":"https://openalex.org/I151939572","display_name":"Athlone Institute of Technology","ror":"https://ror.org/02dyxwz31","country_code":"IE","type":"education","lineage":["https://openalex.org/I151939572"]}],"countries":["IE"],"is_corresponding":false,"raw_author_name":"Enda Fallon","raw_affiliation_strings":["Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","Software Research Institute, Athlone Institute of Technology, Athlone, Ireland"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology,Athlone,Ireland","institution_ids":["https://openalex.org/I151939572"]},{"raw_affiliation_string":"Software Research Institute, Athlone Institute of Technology, Athlone, Ireland","institution_ids":["https://openalex.org/I151939572"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.962,"has_fulltext":false,"cited_by_count":6,"citation_normalized_percentile":{"value":0.76221749,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":97},"biblio":{"volume":null,"issue":null,"first_page":"312","last_page":"318"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12127","display_name":"Software System Performance and Reliability","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10743","display_name":"Software Testing and Debugging Techniques","score":0.9846000075340271,"subfield":{"id":"https://openalex.org/subfields/1712","display_name":"Software"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10260","display_name":"Software Engineering Research","score":0.9835000038146973,"subfield":{"id":"https://openalex.org/subfields/1710","display_name":"Information Systems"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.9082731008529663},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.8472774028778076},{"id":"https://openalex.org/keywords/pipeline","display_name":"Pipeline (software)","score":0.7809125185012817},{"id":"https://openalex.org/keywords/unstructured-data","display_name":"Unstructured data","score":0.6203945875167847},{"id":"https://openalex.org/keywords/cluster-analysis","display_name":"Cluster analysis","score":0.6114399433135986},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.5273669362068176},{"id":"https://openalex.org/keywords/plain-text","display_name":"Plain text","score":0.4480840265750885},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4465208649635315},{"id":"https://openalex.org/keywords/file-format","display_name":"File format","score":0.43189677596092224},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.4275829792022705},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3326738476753235},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.30771851539611816},{"id":"https://openalex.org/keywords/big-data","display_name":"Big data","score":0.2677720785140991},{"id":"https://openalex.org/keywords/programming-language","display_name":"Programming language","score":0.17299097776412964},{"id":"https://openalex.org/keywords/encryption","display_name":"Encryption","score":0.10076776146888733}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.9082731008529663},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.8472774028778076},{"id":"https://openalex.org/C43521106","wikidata":"https://www.wikidata.org/wiki/Q2165493","display_name":"Pipeline (software)","level":2,"score":0.7809125185012817},{"id":"https://openalex.org/C2781252014","wikidata":"https://www.wikidata.org/wiki/Q1141900","display_name":"Unstructured data","level":3,"score":0.6203945875167847},{"id":"https://openalex.org/C73555534","wikidata":"https://www.wikidata.org/wiki/Q622825","display_name":"Cluster analysis","level":2,"score":0.6114399433135986},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.5273669362068176},{"id":"https://openalex.org/C46503548","wikidata":"https://www.wikidata.org/wiki/Q1145976","display_name":"Plain text","level":3,"score":0.4480840265750885},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4465208649635315},{"id":"https://openalex.org/C97250363","wikidata":"https://www.wikidata.org/wiki/Q235557","display_name":"File format","level":2,"score":0.43189677596092224},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.4275829792022705},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3326738476753235},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.30771851539611816},{"id":"https://openalex.org/C75684735","wikidata":"https://www.wikidata.org/wiki/Q858810","display_name":"Big data","level":2,"score":0.2677720785140991},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.17299097776412964},{"id":"https://openalex.org/C148730421","wikidata":"https://www.wikidata.org/wiki/Q141090","display_name":"Encryption","level":2,"score":0.10076776146888733},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C111919701","wikidata":"https://www.wikidata.org/wiki/Q9135","display_name":"Operating system","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"doi:10.23919/icact51234.2021.9370654","is_oa":false,"landing_page_url":"https://doi.org/10.23919/icact51234.2021.9370654","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2021 23rd International Conference on Advanced Communication Technology (ICACT)","raw_type":"proceedings-article"},{"id":"pmh:oai:research.thea.ie:20.500.12065/3555","is_oa":true,"landing_page_url":"http://research.thea.ie/handle/20.500.12065/3555","pdf_url":"https://research.thea.ie/bitstream/20.500.12065/3555/1/An%20extendible%20parsing%20pipeline%20....pdf","source":{"id":"https://openalex.org/S7407055380","display_name":"Research@THEA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2021 23rd International Conference on Advanced Communication Technology (ICACT)","raw_type":"info:eu-repo/semantics/acceptedVersion"}],"best_oa_location":{"id":"pmh:oai:research.thea.ie:20.500.12065/3555","is_oa":true,"landing_page_url":"http://research.thea.ie/handle/20.500.12065/3555","pdf_url":"https://research.thea.ie/bitstream/20.500.12065/3555/1/An%20extendible%20parsing%20pipeline%20....pdf","source":{"id":"https://openalex.org/S7407055380","display_name":"Research@THEA","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"2021 23rd International Conference on Advanced Communication Technology (ICACT)","raw_type":"info:eu-repo/semantics/acceptedVersion"},"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/8","score":0.6600000262260437,"display_name":"Decent work and economic growth"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W3133657745.pdf"},"referenced_works_count":26,"referenced_works":["https://openalex.org/W65738273","https://openalex.org/W168564468","https://openalex.org/W205885184","https://openalex.org/W1583082330","https://openalex.org/W1647671624","https://openalex.org/W1661413208","https://openalex.org/W1969256433","https://openalex.org/W1974648336","https://openalex.org/W2001619934","https://openalex.org/W2070771761","https://openalex.org/W2102632804","https://openalex.org/W2103868202","https://openalex.org/W2144211451","https://openalex.org/W2153470728","https://openalex.org/W2181347089","https://openalex.org/W2481003398","https://openalex.org/W2540904922","https://openalex.org/W2560021099","https://openalex.org/W2951464182","https://openalex.org/W3054225939","https://openalex.org/W3090556797","https://openalex.org/W3108996037","https://openalex.org/W4237335675","https://openalex.org/W6636915900","https://openalex.org/W6681226105","https://openalex.org/W6786865631"],"related_works":["https://openalex.org/W3197228483","https://openalex.org/W2560797684","https://openalex.org/W3201560849","https://openalex.org/W2030910246","https://openalex.org/W40729839","https://openalex.org/W2075304357","https://openalex.org/W2277602920","https://openalex.org/W4221114044","https://openalex.org/W3133657745","https://openalex.org/W2805747897"],"abstract_inverted_index":{"Network":[0],"monitoring":[1],"and":[2,10,22,39,59,75,99,123,128,132,149],"diagnostics":[3],"systems":[4],"depict":[5],"the":[6,40,115,137,140],"running":[7],"system's":[8],"state":[9],"generate":[11],"enormous":[12],"amounts":[13],"of":[14,139],"unstructured":[15,48,65],"data":[16,49,66,116,154],"through":[17],"log":[18],"files,":[19,92],"print":[20],"statements,":[21],"other":[23],"reports.":[24],"It":[25],"is":[26,110,124],"not":[27,119],"feasible":[28],"to":[29,36,42,46,63,112,135,152],"manually":[30],"analyze":[31],"all":[32],"these":[33,71],"files":[34],"due":[35],"limited":[37],"resources":[38],"need":[41,76],"develop":[43],"custom":[44],"parsers":[45],"convert":[47],"into":[50,67,102],"desirable":[51],"file":[52,69,105],"formats.":[53],"Prior":[54],"research":[55],"focuses":[56],"on":[57],"rule-based":[58],"relationship-based":[60],"parsing":[61],"methods":[62,72],"parse":[64],"structured":[68,104],"formats;":[70],"are":[73],"labor-intensive":[74],"large":[77],"annotated":[78],"datasets.":[79],"This":[80],"paper":[81],"presents":[82],"an":[83],"unsupervised":[84],"text":[85,91],"processing":[86],"pipeline":[87],"that":[88,146],"analyses":[89],"such":[90],"removes":[93],"extraneous":[94],"information,":[95],"identifies":[96],"tabular":[97],"components,":[98],"parses":[100],"them":[101],"a":[103],"format.":[106],"The":[107],"proposed":[108,141],"approach":[109],"resilient":[111],"changes":[113],"in":[114],"structure,":[117],"does":[118],"require":[120],"training":[121],"data,":[122],"domain-independent.":[125],"We":[126],"experiment":[127],"compare":[129],"topic":[130,160],"modeling":[131],"clustering":[133,150],"approaches":[134],"verify":[136],"accuracy":[138,158],"technique.":[142],"Our":[143],"findings":[144],"indicate":[145],"combining":[147],"similarity":[148],"algorithms":[151],"identify":[153],"components":[155],"had":[156],"better":[157],"than":[159],"modeling.":[161]},"counts_by_year":[{"year":2023,"cited_by_count":3},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":2}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
