{"id":"https://openalex.org/W4416016641","doi":"https://doi.org/10.1145/3746252.3761030","title":"LLM-Powered Information Extraction for the Dairy Financial Domain: Tackling Data Scarcity and Ambiguity","display_name":"LLM-Powered Information Extraction for the Dairy Financial Domain: Tackling Data Scarcity and Ambiguity","publication_year":2025,"publication_date":"2025-11-07","ids":{"openalex":"https://openalex.org/W4416016641","doi":"https://doi.org/10.1145/3746252.3761030"},"language":null,"primary_location":{"id":"doi:10.1145/3746252.3761030","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746252.3761030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5102779217","display_name":"Chunyan An","orcid":"https://orcid.org/0000-0002-5622-9985"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":true,"raw_author_name":"Chunyan An","raw_affiliation_strings":["Inner Mongolia University, Hohhot, Inner Mongolar, China"],"affiliations":[{"raw_affiliation_string":"Inner Mongolia University, Hohhot, Inner Mongolar, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5023098316","display_name":"Y. P. Huang","orcid":"https://orcid.org/0009-0001-6073-8053"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Yuying Huang","raw_affiliation_strings":["Inner Mongolia University, Hohhot, Inner Mongolar, China"],"affiliations":[{"raw_affiliation_string":"Inner Mongolia University, Hohhot, Inner Mongolar, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115592114","display_name":"Qiang Yang","orcid":"https://orcid.org/0000-0001-8731-3533"},"institutions":[{"id":"https://openalex.org/I33213144","display_name":"University of Florida","ror":"https://ror.org/02y3ad647","country_code":"US","type":"education","lineage":["https://openalex.org/I33213144"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Qiang Yang","raw_affiliation_strings":["University of Florida, Gainesville, FL, USA"],"affiliations":[{"raw_affiliation_string":"University of Florida, Gainesville, FL, USA","institution_ids":["https://openalex.org/I33213144"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5104273480","display_name":"Siyu Yuan","orcid":"https://orcid.org/0009-0002-3547-2676"},"institutions":[{"id":"https://openalex.org/I2722730","display_name":"Inner Mongolia University","ror":"https://ror.org/0106qb496","country_code":"CN","type":"education","lineage":["https://openalex.org/I2722730"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Siyu Yuan","raw_affiliation_strings":["Inner Mongolia University, Hohhot, Inner Mongolar, China"],"affiliations":[{"raw_affiliation_string":"Inner Mongolia University, Hohhot, Inner Mongolar, China","institution_ids":["https://openalex.org/I2722730"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5065529268","display_name":"Zhixu Li","orcid":"https://orcid.org/0000-0003-2355-288X"},"institutions":[{"id":"https://openalex.org/I78988378","display_name":"Renmin University of China","ror":"https://ror.org/041pakw92","country_code":"CN","type":"education","lineage":["https://openalex.org/I78988378"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhixu Li","raw_affiliation_strings":["Renmin University of China, Beijing, China"],"affiliations":[{"raw_affiliation_string":"Renmin University of China, Beijing, China","institution_ids":["https://openalex.org/I78988378"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https://openalex.org/A5102779217"],"corresponding_institution_ids":["https://openalex.org/I2722730"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.17530924,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"55","last_page":"64"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5157999992370605,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.5157999992370605,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10664","display_name":"Sentiment Analysis and Opinion Mining","score":0.08889999985694885,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13702","display_name":"Machine Learning in Healthcare","score":0.060499999672174454,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/ambiguity","display_name":"Ambiguity","score":0.8022000193595886},{"id":"https://openalex.org/keywords/scarcity","display_name":"Scarcity","score":0.49540001153945923},{"id":"https://openalex.org/keywords/information-extraction","display_name":"Information extraction","score":0.47029998898506165},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4212000072002411},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.36959999799728394},{"id":"https://openalex.org/keywords/generalization","display_name":"Generalization","score":0.35409998893737793},{"id":"https://openalex.org/keywords/strengths-and-weaknesses","display_name":"Strengths and weaknesses","score":0.3418000042438507},{"id":"https://openalex.org/keywords/data-extraction","display_name":"Data extraction","score":0.3070000112056732}],"concepts":[{"id":"https://openalex.org/C2780522230","wikidata":"https://www.wikidata.org/wiki/Q1140419","display_name":"Ambiguity","level":2,"score":0.8022000193595886},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7294999957084656},{"id":"https://openalex.org/C109747225","wikidata":"https://www.wikidata.org/wiki/Q815758","display_name":"Scarcity","level":2,"score":0.49540001153945923},{"id":"https://openalex.org/C195807954","wikidata":"https://www.wikidata.org/wiki/Q1662562","display_name":"Information extraction","level":2,"score":0.47029998898506165},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4212000072002411},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.36959999799728394},{"id":"https://openalex.org/C177148314","wikidata":"https://www.wikidata.org/wiki/Q170084","display_name":"Generalization","level":2,"score":0.35409998893737793},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.3449999988079071},{"id":"https://openalex.org/C63882131","wikidata":"https://www.wikidata.org/wiki/Q17122954","display_name":"Strengths and weaknesses","level":2,"score":0.3418000042438507},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3359000086784363},{"id":"https://openalex.org/C10138342","wikidata":"https://www.wikidata.org/wiki/Q43015","display_name":"Finance","level":1,"score":0.32589998841285706},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.321399986743927},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.31220000982284546},{"id":"https://openalex.org/C2777466982","wikidata":"https://www.wikidata.org/wiki/Q5227287","display_name":"Data extraction","level":3,"score":0.3070000112056732},{"id":"https://openalex.org/C25343380","wikidata":"https://www.wikidata.org/wiki/Q277521","display_name":"Relation (database)","level":2,"score":0.3012999892234802},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.296999990940094},{"id":"https://openalex.org/C2780264999","wikidata":"https://www.wikidata.org/wiki/Q7445032","display_name":"Security domain","level":2,"score":0.2939000129699707},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.2847000062465668},{"id":"https://openalex.org/C67186912","wikidata":"https://www.wikidata.org/wiki/Q367664","display_name":"Data modeling","level":2,"score":0.2696000039577484},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.2678999900817871},{"id":"https://openalex.org/C112930515","wikidata":"https://www.wikidata.org/wiki/Q4389547","display_name":"Risk analysis (engineering)","level":1,"score":0.2644999921321869},{"id":"https://openalex.org/C146880194","wikidata":"https://www.wikidata.org/wiki/Q61715222","display_name":"Computational finance","level":2,"score":0.262800008058548},{"id":"https://openalex.org/C153604712","wikidata":"https://www.wikidata.org/wiki/Q7310755","display_name":"Relationship extraction","level":3,"score":0.25619998574256897},{"id":"https://openalex.org/C26517878","wikidata":"https://www.wikidata.org/wiki/Q228039","display_name":"Key (lock)","level":2,"score":0.25220000743865967}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3746252.3761030","is_oa":false,"landing_page_url":"https://doi.org/10.1145/3746252.3761030","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 34th ACM International Conference on Information and Knowledge Management","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":17,"referenced_works":["https://openalex.org/W2963216553","https://openalex.org/W2963545917","https://openalex.org/W2981089724","https://openalex.org/W2985981782","https://openalex.org/W2997918300","https://openalex.org/W3034864438","https://openalex.org/W3094861998","https://openalex.org/W3115242847","https://openalex.org/W3166915941","https://openalex.org/W3174828871","https://openalex.org/W4229909500","https://openalex.org/W4287829148","https://openalex.org/W4402667093","https://openalex.org/W4402683908","https://openalex.org/W4407953291","https://openalex.org/W4408941434","https://openalex.org/W6922140027"],"related_works":[],"abstract_inverted_index":{"Information":[0],"extraction":[1],"is":[2,90],"a":[3,64,103],"critical":[4],"technology":[5],"for":[6,116,155,186],"intelligent":[7],"analysis":[8],"and":[9,26,34,51,77,119,184,189,213],"risk":[10],"assessment":[11],"in":[12,36,55,218],"the":[13,24,86,94,108,120,137,176,211],"dairy":[14,95,220],"financial":[15,221],"domain.":[16],"However,":[17],"real-world":[18,219],"applications":[19],"face":[20],"three":[21],"major":[22],"challenges:":[23],"complexity":[25],"diversity":[27],"of":[28,182,215],"entity-relation":[29],"types,":[30,118],"significant":[31],"data":[32,75,105],"imbalance,":[33],"ambiguity":[35],"textual":[37],"expressions.":[38],"Traditional":[39],"methods":[40],"often":[41],"fail":[42],"to":[43,93,113,135,200],"capture":[44],"rare":[45,117],"patterns,":[46],"struggle":[47],"with":[48,73,143,175],"vague":[49,157],"mentions,":[50],"exhibit":[52],"poor":[53],"generalization":[54],"low-resource":[56],"settings.":[57],"To":[58,145],"address":[59],"these":[60],"issues,":[61],"we":[62,149],"propose":[63],"novel":[65],"framework":[66,171],"that":[67,169],"integrates":[68],"large":[69],"language":[70],"models":[71],"(LLMs)":[72],"targeted":[74],"augmentation":[76,106],"agent-based":[78,152],"retrieval-augmented":[79],"generation":[80],"(RAG).":[81],"Our":[82],"approach":[83],"builds":[84],"on":[85,127],"BaiChuan2":[87],"model,":[88],"which":[89],"first":[91,109],"adapted":[92],"finance":[96],"domain":[97],"via":[98],"secondary":[99],"pretraining.":[100],"We":[101],"introduce":[102],"two-stage":[104],"strategy:":[107],"stage":[110,122],"uses":[111],"ChatGPT":[112],"generate":[114],"pseudo-samples":[115],"second":[121],"refines":[123],"model":[124,138],"weaknesses":[125],"based":[126],"prediction-guided":[128],"feedback.":[129],"These":[130],"augmented":[131],"datasets":[132],"are":[133],"used":[134],"fine-tune":[136],"through":[139],"prompt-based":[140],"supervised":[141],"learning":[142],"LoRA.":[144],"further":[146],"enhance":[147],"robustness,":[148],"incorporate":[150],"an":[151],"RAG":[153,194],"module":[154],"completing":[156],"or":[158],"underspecified":[159],"entities":[160],"by":[161,206],"retrieving":[162],"external":[163],"contextual":[164],"knowledge.":[165],"Extensive":[166],"experiments":[167],"demonstrate":[168],"our":[170,216],"achieves":[172],"state-of-the-art":[173],"performance,":[174],"improved":[177],"metric,":[178],"i.e.,":[179],"F1+":[180],"scores,":[181],"0.876":[183],"0.824":[185],"entity":[187,197],"recognition":[188],"relation":[190],"extraction,":[191],"respectively.":[192],"The":[193],"component":[195],"boosts":[196],"completion":[198],"accuracy":[199],"0.802":[201],"while":[202],"reducing":[203],"retrieval":[204],"latency":[205],"over":[207],"6x,":[208],"showcasing":[209],"both":[210],"effectiveness":[212],"practicality":[214],"method":[217],"applications.":[222]},"counts_by_year":[],"updated_date":"2026-03-27T05:58:40.876381","created_date":"2025-11-08T00:00:00"}
