{"id":"https://openalex.org/W4402659583","doi":"https://doi.org/10.1145/3685650.3685665","title":"CatalogBank: A Structured and Interoperable Catalog Dataset with a Semi-Automatic Annotation Tool (DocumentLabeler) for Engineering System Design","display_name":"CatalogBank: A Structured and Interoperable Catalog Dataset with a Semi-Automatic Annotation Tool (DocumentLabeler) for Engineering System Design","publication_year":2024,"publication_date":"2024-08-20","ids":{"openalex":"https://openalex.org/W4402659583","doi":"https://doi.org/10.1145/3685650.3685665"},"language":"en","primary_location":{"id":"doi:10.1145/3685650.3685665","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3685650.3685665","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Symposium on Document Engineering 2024","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5048802212","display_name":"Hasan Sinan Bank","orcid":"https://orcid.org/0000-0002-0626-2664"},"institutions":[{"id":"https://openalex.org/I92446798","display_name":"Colorado State University","ror":"https://ror.org/03k1gpj17","country_code":"US","type":"education","lineage":["https://openalex.org/I92446798"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Hasan Sinan Bank","raw_affiliation_strings":["Colorado State University, Fort Collins, Colorado, USA"],"affiliations":[{"raw_affiliation_string":"Colorado State University, Fort Collins, Colorado, USA","institution_ids":["https://openalex.org/I92446798"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5077474480","display_name":"Daniel R. Herber","orcid":"https://orcid.org/0000-0003-4995-7375"},"institutions":[{"id":"https://openalex.org/I92446798","display_name":"Colorado State University","ror":"https://ror.org/03k1gpj17","country_code":"US","type":"education","lineage":["https://openalex.org/I92446798"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Daniel R. Herber","raw_affiliation_strings":["Colorado State University, Fort Collins, Colorado, USA"],"affiliations":[{"raw_affiliation_string":"Colorado State University, Fort Collins, Colorado, USA","institution_ids":["https://openalex.org/I92446798"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":2,"corresponding_author_ids":["https://openalex.org/A5048802212"],"corresponding_institution_ids":["https://openalex.org/I92446798"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.13699423,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"9"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9988999962806702,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10215","display_name":"Semantic Web and Ontologies","score":0.9976999759674072,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13523","display_name":"Mathematics, Computing, and Information Processing","score":0.9927999973297119,"subfield":{"id":"https://openalex.org/subfields/1703","display_name":"Computational Theory and Mathematics"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/interoperability","display_name":"Interoperability","score":0.7921528816223145},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7455928921699524},{"id":"https://openalex.org/keywords/annotation","display_name":"Annotation","score":0.743229866027832},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.4284575879573822},{"id":"https://openalex.org/keywords/software-engineering","display_name":"Software engineering","score":0.42560455203056335},{"id":"https://openalex.org/keywords/database","display_name":"Database","score":0.3351999521255493},{"id":"https://openalex.org/keywords/world-wide-web","display_name":"World Wide Web","score":0.3327159583568573},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.16845285892486572}],"concepts":[{"id":"https://openalex.org/C20136886","wikidata":"https://www.wikidata.org/wiki/Q749647","display_name":"Interoperability","level":2,"score":0.7921528816223145},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7455928921699524},{"id":"https://openalex.org/C2776321320","wikidata":"https://www.wikidata.org/wiki/Q857525","display_name":"Annotation","level":2,"score":0.743229866027832},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.4284575879573822},{"id":"https://openalex.org/C115903868","wikidata":"https://www.wikidata.org/wiki/Q80993","display_name":"Software engineering","level":1,"score":0.42560455203056335},{"id":"https://openalex.org/C77088390","wikidata":"https://www.wikidata.org/wiki/Q8513","display_name":"Database","level":1,"score":0.3351999521255493},{"id":"https://openalex.org/C136764020","wikidata":"https://www.wikidata.org/wiki/Q466","display_name":"World Wide Web","level":1,"score":0.3327159583568573},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.16845285892486572}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3685650.3685665","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3685650.3685665","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the ACM Symposium on Document Engineering 2024","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"id":"https://metadata.un.org/sdg/9","score":0.47999998927116394,"display_name":"Industry, innovation and infrastructure"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":34,"referenced_works":["https://openalex.org/W607481611","https://openalex.org/W1605209582","https://openalex.org/W1998802865","https://openalex.org/W2011120103","https://openalex.org/W2020870737","https://openalex.org/W2045117606","https://openalex.org/W2073299727","https://openalex.org/W2102381086","https://openalex.org/W2110764733","https://openalex.org/W2559969327","https://openalex.org/W2750402403","https://openalex.org/W2786162033","https://openalex.org/W2972931660","https://openalex.org/W2979023429","https://openalex.org/W2986619406","https://openalex.org/W2997154779","https://openalex.org/W3003206728","https://openalex.org/W3003335533","https://openalex.org/W3003484198","https://openalex.org/W3003711898","https://openalex.org/W3096702548","https://openalex.org/W3104953317","https://openalex.org/W3163650427","https://openalex.org/W3185376398","https://openalex.org/W3193837083","https://openalex.org/W3193950908","https://openalex.org/W3206546168","https://openalex.org/W4213349615","https://openalex.org/W4306722876","https://openalex.org/W4312233877","https://openalex.org/W4385595330","https://openalex.org/W4386827556","https://openalex.org/W4391601955","https://openalex.org/W4402753669"],"related_works":["https://openalex.org/W2361861616","https://openalex.org/W2263699433","https://openalex.org/W2377979023","https://openalex.org/W2093262417","https://openalex.org/W2218034408","https://openalex.org/W2392921965","https://openalex.org/W2123131699","https://openalex.org/W2358755282","https://openalex.org/W650116260","https://openalex.org/W2378329187"],"abstract_inverted_index":{"In":[0],"the":[1,11,40,82,91,105,115,129,178],"realm":[2],"of":[3,13,85,93,108,117,131,166],"document":[4,153,171],"engineering":[5,51,154],"and":[6,29,45,97,110,142,155,168],"Natural":[7],"Language":[8],"Processing":[9],"(NLP),":[10],"integration":[12,107],"digitally":[14],"born":[15],"catalogs":[16,66],"into":[17],"product":[18,62],"design":[19,52,86],"processes":[20],"presents":[21],"a":[22,35,74,159],"novel":[23],"avenue":[24],"for":[25,124,162],"enhancing":[26],"information":[27,57,63],"extraction":[28,58],"interoperability.":[30],"This":[31],"paper":[32],"introduces":[33],"CatalogBank,":[34],"dataset":[36,161],"developed":[37],"to":[38,50,60,67,72,152],"bridge":[39],"gap":[41],"between":[42],"textual":[43,109],"descriptions":[44],"other":[46,111],"data":[47,95,112],"modalities":[48],"related":[49],"catalogs.":[53],"We":[54],"utilized":[55],"existing":[56],"methodologies":[59],"extract":[61],"from":[64],"PDF-based":[65],"use":[68,116],"in":[69,133],"downstream":[70],"tasks":[71,137],"generate":[73],"baseline":[75],"metric.":[76],"Our":[77,145],"approach":[78],"not":[79],"only":[80],"supports":[81],"potential":[83,130],"automation":[84],"workflows":[87],"but":[88],"also":[89],"overcomes":[90],"limitations":[92],"manual":[94],"entry":[96],"non-standard":[98],"metadata":[99],"structures":[100],"that":[101,148],"have":[102],"historically":[103],"impeded":[104],"seamless":[106],"modalities.":[113],"Through":[114],"DocumentLabeler,":[118],"an":[119],"open-source":[120],"annotation":[121,180],"tool":[122,181],"adapted":[123],"our":[125],"dataset,":[126],"we":[127],"demonstrated":[128],"CatalogBank":[132,149],"supporting":[134],"diverse":[135],"document-based":[136],"such":[138],"as":[139],"layout":[140],"analysis":[141],"knowledge":[143],"extraction.":[144],"findings":[146],"suggest":[147],"can":[150],"contribute":[151],"NLP":[156],"by":[157],"providing":[158],"robust":[160],"training":[163],"models":[164],"capable":[165],"understanding":[167],"processing":[169],"complex":[170],"formats":[172],"with":[173],"relatively":[174],"less":[175],"effort":[176],"using":[177],"semi-automated":[179],"DocumentLabeler.":[182]},"counts_by_year":[],"updated_date":"2025-12-21T23:12:01.093139","created_date":"2025-10-10T00:00:00"}
