{"id":"https://openalex.org/W4412197270","doi":"https://doi.org/10.1145/3725899.3725915","title":"A Context-Aware Multimodal Fusion DST Model: Advancing Task-Oriented Dialogue Systems","display_name":"A Context-Aware Multimodal Fusion DST Model: Advancing Task-Oriented Dialogue Systems","publication_year":2025,"publication_date":"2025-01-10","ids":{"openalex":"https://openalex.org/W4412197270","doi":"https://doi.org/10.1145/3725899.3725915"},"language":"en","primary_location":{"id":"doi:10.1145/3725899.3725915","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725899.3725915","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 8th International Conference on Software Engineering and Information Management","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://doi.org/10.1145/3725899.3725915","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Dali Zhao","orcid":"https://orcid.org/0009-0000-4563-8022"},"institutions":[{"id":"https://openalex.org/I4210124822","display_name":"State Development & Investment Corporation (China)","ror":"https://ror.org/030mq9680","country_code":"CN","type":"company","lineage":["https://openalex.org/I4210124822"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Dali Zhao","raw_affiliation_strings":["China National Oil and Gas Exploration and Development Company, Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0000-4563-8022","affiliations":[{"raw_affiliation_string":"China National Oil and Gas Exploration and Development Company, Beijing, Beijing, China","institution_ids":["https://openalex.org/I4210124822"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Zhiyong Shu","orcid":"https://orcid.org/0009-0007-3350-5370"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Zhiyong Shu","raw_affiliation_strings":["The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"],"raw_orcid":"https://orcid.org/0009-0007-3350-5370","affiliations":[{"raw_affiliation_string":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"middle","author":{"id":null,"display_name":"Lei Feng","orcid":"https://orcid.org/0009-0002-9717-7410"},"institutions":[{"id":"https://openalex.org/I194716290","display_name":"China Academy of Space Technology","ror":"https://ror.org/025397a59","country_code":"CN","type":"government","lineage":["https://openalex.org/I194716290","https://openalex.org/I2802615301"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Lei Feng","raw_affiliation_strings":["Space Star Technology Co., Ltd., Beijing, Beijing, China"],"raw_orcid":"https://orcid.org/0009-0002-9717-7410","affiliations":[{"raw_affiliation_string":"Space Star Technology Co., Ltd., Beijing, Beijing, China","institution_ids":["https://openalex.org/I194716290"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043163390","display_name":"Jielei Wang","orcid":"https://orcid.org/0000-0003-2882-7053"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Jielei Wang","raw_affiliation_strings":["The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"],"raw_orcid":"https://orcid.org/0000-0003-2882-7053","affiliations":[{"raw_affiliation_string":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China","institution_ids":["https://openalex.org/I150229711"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5000444026","display_name":"Guoming Lu","orcid":"https://orcid.org/0000-0001-7477-5800"},"institutions":[{"id":"https://openalex.org/I150229711","display_name":"University of Electronic Science and Technology of China","ror":"https://ror.org/04qr3zq92","country_code":"CN","type":"education","lineage":["https://openalex.org/I150229711"]}],"countries":["CN"],"is_corresponding":false,"raw_author_name":"Guoming Lu","raw_affiliation_strings":["The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China"],"raw_orcid":"https://orcid.org/0000-0001-7477-5800","affiliations":[{"raw_affiliation_string":"The Institute of Intelligent Computing, University of Electronic Science and Technology of China, Chengdu, Sichuan, China","institution_ids":["https://openalex.org/I150229711"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07367011,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"102","last_page":"109"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9940999746322632,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10456","display_name":"Multi-Agent Systems and Negotiation","score":0.9908000230789185,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.7458406686782837},{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6259549260139465},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.6049293875694275},{"id":"https://openalex.org/keywords/context-model","display_name":"Context model","score":0.4912824034690857},{"id":"https://openalex.org/keywords/sensor-fusion","display_name":"Sensor fusion","score":0.47420552372932434},{"id":"https://openalex.org/keywords/multimodal-interaction","display_name":"Multimodal interaction","score":0.4719654321670532},{"id":"https://openalex.org/keywords/fusion","display_name":"Fusion","score":0.4499833583831787},{"id":"https://openalex.org/keywords/human\u2013computer-interaction","display_name":"Human\u2013computer interaction","score":0.4447239637374878},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.3827858567237854},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.32520490884780884},{"id":"https://openalex.org/keywords/systems-engineering","display_name":"Systems engineering","score":0.1576852798461914},{"id":"https://openalex.org/keywords/engineering","display_name":"Engineering","score":0.10057953000068665},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.07071354985237122}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.7458406686782837},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6259549260139465},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.6049293875694275},{"id":"https://openalex.org/C183322885","wikidata":"https://www.wikidata.org/wiki/Q17007702","display_name":"Context model","level":3,"score":0.4912824034690857},{"id":"https://openalex.org/C33954974","wikidata":"https://www.wikidata.org/wiki/Q486494","display_name":"Sensor fusion","level":2,"score":0.47420552372932434},{"id":"https://openalex.org/C135641252","wikidata":"https://www.wikidata.org/wiki/Q738567","display_name":"Multimodal interaction","level":2,"score":0.4719654321670532},{"id":"https://openalex.org/C158525013","wikidata":"https://www.wikidata.org/wiki/Q2593739","display_name":"Fusion","level":2,"score":0.4499833583831787},{"id":"https://openalex.org/C107457646","wikidata":"https://www.wikidata.org/wiki/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.4447239637374878},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3827858567237854},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.32520490884780884},{"id":"https://openalex.org/C201995342","wikidata":"https://www.wikidata.org/wiki/Q682496","display_name":"Systems engineering","level":1,"score":0.1576852798461914},{"id":"https://openalex.org/C127413603","wikidata":"https://www.wikidata.org/wiki/Q11023","display_name":"Engineering","level":0,"score":0.10057953000068665},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.07071354985237122},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C2781238097","wikidata":"https://www.wikidata.org/wiki/Q175026","display_name":"Object (grammar)","level":2,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3725899.3725915","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725899.3725915","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 8th International Conference on Software Engineering and Information Management","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3725899.3725915","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3725899.3725915","pdf_url":null,"source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 8th International Conference on Software Engineering and Information Management","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":9,"referenced_works":["https://openalex.org/W2194775991","https://openalex.org/W2972777589","https://openalex.org/W3034999214","https://openalex.org/W3100110884","https://openalex.org/W3156555225","https://openalex.org/W4226283120","https://openalex.org/W4281398986","https://openalex.org/W4298396477","https://openalex.org/W4310031459"],"related_works":["https://openalex.org/W2099421762","https://openalex.org/W2530546662","https://openalex.org/W2967030268","https://openalex.org/W2185253430","https://openalex.org/W4210345652","https://openalex.org/W2132659060","https://openalex.org/W2031992971","https://openalex.org/W3214791684","https://openalex.org/W2152662039","https://openalex.org/W1970108856"],"abstract_inverted_index":{"Task-oriented":[0],"dialogue":[1,93,135],"state":[2,94,136],"tracking":[3],"is":[4],"crucial":[5],"for":[6,142],"intelligent":[7],"human-computer":[8],"interaction.":[9],"Traditional":[10],"methods,":[11],"however,":[12],"falter":[13],"in":[14,66,87,110,133,145],"handling":[15],"the":[16,83,99,146],"escalating":[17],"complexity":[18],"and":[19,31,58,78,92,113,124],"diversity":[20],"of":[21,121],"scenarios.":[22],"They":[23],"struggle":[24],"with":[25,61],"context":[26],"comprehension,":[27],"user":[28],"behavior":[29],"analysis,":[30],"accommodating":[32],"various":[33],"interaction":[34],"modes.":[35],"Multimodal":[36],"approaches":[37],"emerge":[38],"as":[39,76],"a":[40,46,67,139],"promising":[41],"solution.":[42],"This":[43,127],"paper":[44],"presents":[45],"novel":[47],"knowledge-enhanced,":[48],"context-aware":[49],"multimodal":[50,134],"fusion":[51],"DST":[52],"model.":[53],"It":[54],"adeptly":[55],"fuses":[56],"visual":[57,122],"language":[59],"information":[60],"domain":[62],"entity":[63],"prior":[64,125],"knowledge":[65],"graph":[68],"neural":[69],"network.":[70],"To":[71],"address":[72],"dataset":[73,86,102],"limitations":[74],"such":[75],"non-open-source":[77],"imbalanced":[79],"distributions,":[80],"we":[81],"expand":[82],"SIMMC":[84,100],"2.1":[85,101],"corpus":[88],"translation,":[89],"image":[90],"transformation,":[91],"adjustment.":[95],"Experimental":[96],"results":[97],"on":[98],"demonstrate":[103],"that":[104],"our":[105],"model":[106],"surpasses":[107],"baseline":[108],"models":[109],"Act":[111],"F1":[112,115],"Slot":[114],"scores,":[116],"highlighting":[117],"its":[118],"better":[119],"utilization":[120],"modality":[123],"knowledge.":[126],"research":[128],"thus":[129],"makes":[130],"significant":[131],"strides":[132],"tracking,":[137],"laying":[138],"solid":[140],"foundation":[141],"further":[143],"enhancements":[144],"field.":[147]},"counts_by_year":[],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
