{"id":"https://openalex.org/W4408354301","doi":"https://doi.org/10.1109/icassp49660.2025.10889977","title":"AceParse: A Comprehensive Dataset with Diverse Structured Texts for Academic Literature Parsing","display_name":"AceParse: A Comprehensive Dataset with Diverse Structured Texts for Academic Literature Parsing","publication_year":2025,"publication_date":"2025-03-12","ids":{"openalex":"https://openalex.org/W4408354301","doi":"https://doi.org/10.1109/icassp49660.2025.10889977"},"language":"en","primary_location":{"id":"doi:10.1109/icassp49660.2025.10889977","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889977","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102627233","display_name":"Huawei Ji","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Huawei Ji","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5059632585","display_name":"Cheng Deng","orcid":"https://orcid.org/0000-0002-3171-823X"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Cheng Deng","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101843145","display_name":"Bo Xue","orcid":"https://orcid.org/0000-0002-7295-4853"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Bo Xue","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5111233712","display_name":"Zhouyang Jin","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhouyang Jin","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5080158271","display_name":"Jiaxin Ding","orcid":"https://orcid.org/0000-0002-0009-9237"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jiaxin Ding","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5085791658","display_name":"Xiaoying Gan","orcid":"https://orcid.org/0000-0001-5200-1409"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xiaoying Gan","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5048486573","display_name":"Luoyi Fu","orcid":"https://orcid.org/0000-0001-7796-9168"},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Luoyi Fu","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5110243503","display_name":"Xinbing Wang","orcid":null},"institutions":[{"id":"https://openalex.org/I183067930","display_name":"Shanghai Jiao Tong University","ror":"https://ror.org/0220qvk04","country_code":"CN","type":"education","lineage":["https://openalex.org/I183067930"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Xinbing Wang","raw_affiliation_strings":["Shanghai Jiao Tong University,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Shanghai Jiao Tong University,China","institution_ids":["https://openalex.org/I183067930"]}]},{"author_position":"last","author":{"id":null,"display_name":"Chenghu Zhou","orcid":null},"institutions":[{"id":"https://openalex.org/I19820366","display_name":"Chinese Academy of Sciences","ror":"https://ror.org/034t30j35","country_code":"CN","type":"government","lineage":["https://openalex.org/I19820366"]},{"id":"https://openalex.org/I4210160793","display_name":"Institute of Geographic Sciences and Natural Resources Research","ror":"https://ror.org/04t1cdb72","country_code":"CN","type":"facility","lineage":["https://openalex.org/I19820366","https://openalex.org/I4210160793"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Chenghu Zhou","raw_affiliation_strings":["Chinese Academy of Sciences,IGSNRR,Beijing,China"],"raw_orcid":null,"affiliations":[{"raw_affiliation_string":"Chinese Academy of Sciences,IGSNRR,Beijing,China","institution_ids":["https://openalex.org/I4210160793","https://openalex.org/I19820366"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":9,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.01702573,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"1","last_page":"5"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9904000163078308,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9605000019073486,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8111283183097839},{"id":"https://openalex.org/keywords/parsing","display_name":"Parsing","score":0.805586040019989},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.5485438108444214},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.460550457239151},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.43616700172424316},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.36603212356567383}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8111283183097839},{"id":"https://openalex.org/C186644900","wikidata":"https://www.wikidata.org/wiki/Q194152","display_name":"Parsing","level":2,"score":0.805586040019989},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.5485438108444214},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.460550457239151},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43616700172424316},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.36603212356567383}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp49660.2025.10889977","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp49660.2025.10889977","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2025 - 2025 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[{"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4","score":0.8100000023841858}],"awards":[],"funders":[{"id":"https://openalex.org/F4320321001","display_name":"National Natural Science Foundation of China","ror":"https://ror.org/01h0zpd94"}],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2001642682","https://openalex.org/W2101105183","https://openalex.org/W3034999214","https://openalex.org/W3113753692","https://openalex.org/W3208988339","https://openalex.org/W3216666189","https://openalex.org/W4213009331","https://openalex.org/W4290927927","https://openalex.org/W4312881242","https://openalex.org/W4389519217","https://openalex.org/W4392384758","https://openalex.org/W4402660140","https://openalex.org/W4403792205","https://openalex.org/W4406085118","https://openalex.org/W6636915900","https://openalex.org/W6740017710","https://openalex.org/W6760195326"],"related_works":["https://openalex.org/W579810227","https://openalex.org/W2142145894","https://openalex.org/W2952780262","https://openalex.org/W2979495269","https://openalex.org/W2392917763","https://openalex.org/W4381248170","https://openalex.org/W3189621521","https://openalex.org/W2173794830","https://openalex.org/W1502858101","https://openalex.org/W3204019825"],"abstract_inverted_index":{"With":[0],"the":[1,6,22,53,68,75,117,134],"development":[2],"of":[3,21,55,77,81,124,136],"data-centric":[4],"AI,":[5],"focus":[7],"has":[8],"shifted":[9],"from":[10],"model-driven":[11],"approaches":[12],"to":[13,33,52,73],"improving":[14],"data":[15],"quality.":[16],"Academic":[17],"literature,":[18],"as":[19],"one":[20],"crucial":[23],"types,":[24],"is":[25,145],"predominantly":[26],"stored":[27],"in":[28,46,122,130,139],"PDF":[29],"formats":[30],"and":[31,89,127],"needs":[32],"be":[34],"parsed":[35],"into":[36],"texts":[37,45,110],"before":[38],"further":[39],"processing.":[40],"However,":[41],"parsing":[42,76],"diverse":[43],"structured":[44,82,109],"academic":[47,112,140],"literature":[48,141],"remains":[49],"challenging":[50],"due":[51],"lack":[54],"datasets":[56],"that":[57],"cover":[58],"various":[59,108],"text":[60],"structures.":[61],"In":[62],"this":[63],"paper,":[64],"we":[65,98],"introduce":[66],"AceParse,":[67,97],"first":[69],"comprehensive":[70],"dataset":[71,144],"designed":[72],"support":[74],"a":[78,100],"wide":[79],"range":[80],"texts,":[83],"including":[84],"formulas,":[85],"tables,":[86],"lists,":[87],"algorithms,":[88],"sentences":[90],"with":[91],"embedded":[92],"mathematical":[93],"expressions.":[94],"Based":[95],"on":[96],"fine-tuned":[99],"multimodal":[101,137],"model,":[102],"named":[103],"AceParser,":[104],"which":[105],"accurately":[106],"parses":[107],"within":[111],"literature.":[113],"This":[114],"model":[115],"outperforms":[116],"previous":[118],"state-of-the-art":[119],"by":[120,128],"4.1%":[121],"terms":[123],"F1":[125],"score":[126],"5%":[129],"Jaccard":[131],"Similarity,":[132],"demonstrating":[133],"potential":[135],"models":[138],"parsing.":[142],"Our":[143],"available":[146],"at":[147],"https://github.com/JHW5981/AceParse.":[148]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
