{"id":"https://openalex.org/W4312091556","doi":"https://doi.org/10.48550/arxiv.2212.10465","title":"SODA: Million-scale Dialogue Distillation with Social Commonsense Contextualization","display_name":"SODA: Million-scale Dialogue Distillation with Social Commonsense Contextualization","publication_year":2022,"publication_date":"2022-12-20","ids":{"openalex":"https://openalex.org/W4312091556","doi":"https://doi.org/10.48550/arxiv.2212.10465"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2212.10465","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2212.10465","pdf_url":"https://arxiv.org/pdf/2212.10465","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://arxiv.org/pdf/2212.10465","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5100330223","display_name":"Hyunwoo Kim","orcid":"https://orcid.org/0000-0003-4810-6333"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Hyunwoo","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5043614405","display_name":"Jack Hessel","orcid":"https://orcid.org/0000-0002-4012-8979"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Hessel, Jack","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5049904411","display_name":"Liwei Jiang","orcid":"https://orcid.org/0000-0001-5990-2509"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Liwei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":null,"display_name":"West, Peter","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"West, Peter","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5102930940","display_name":"Ximing Lu","orcid":"https://orcid.org/0000-0001-6671-4573"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Lu, Ximing","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101881857","display_name":"Youngjae Yu","orcid":"https://orcid.org/0000-0002-5867-0782"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yu, Youngjae","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5074236242","display_name":"Pei Zhou","orcid":"https://orcid.org/0000-0001-7570-9098"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhou, Pei","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5024879161","display_name":"Ronan Le Bras","orcid":"https://orcid.org/0000-0003-2439-6938"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bras, Ronan Le","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5025559955","display_name":"Malihe Alikhani","orcid":"https://orcid.org/0000-0002-1315-2228"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Alikhani, Malihe","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100664729","display_name":"Gunhee Kim","orcid":"https://orcid.org/0000-0002-9543-7453"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Kim, Gunhee","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5015128745","display_name":"Maarten Sap","orcid":"https://orcid.org/0000-0002-0701-4654"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sap, Maarten","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5102992157","display_name":"Yejin Choi","orcid":"https://orcid.org/0000-0003-3032-5378"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Choi, Yejin","raw_affiliation_strings":[],"raw_orcid":null,"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":12,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":3,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9805999994277954,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11273","display_name":"Advanced Graph Neural Networks","score":0.9699000120162964,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/commonsense-knowledge","display_name":"Commonsense knowledge","score":0.707787275314331},{"id":"https://openalex.org/keywords/conversation","display_name":"Conversation","score":0.6052919626235962},{"id":"https://openalex.org/keywords/contextualization","display_name":"Contextualization","score":0.6002997756004333},{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5963397026062012},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4465119242668152},{"id":"https://openalex.org/keywords/code","display_name":"Code (set theory)","score":0.4388556480407715},{"id":"https://openalex.org/keywords/scale","display_name":"Scale (ratio)","score":0.4369232654571533},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3488925099372864},{"id":"https://openalex.org/keywords/data-science","display_name":"Data science","score":0.3469163179397583},{"id":"https://openalex.org/keywords/domain-knowledge","display_name":"Domain knowledge","score":0.23623013496398926},{"id":"https://openalex.org/keywords/sociology","display_name":"Sociology","score":0.19886276125907898},{"id":"https://openalex.org/keywords/communication","display_name":"Communication","score":0.1193411648273468},{"id":"https://openalex.org/keywords/geography","display_name":"Geography","score":0.10837706923484802}],"concepts":[{"id":"https://openalex.org/C30542707","wikidata":"https://www.wikidata.org/wiki/Q1603203","display_name":"Commonsense knowledge","level":3,"score":0.707787275314331},{"id":"https://openalex.org/C2777200299","wikidata":"https://www.wikidata.org/wiki/Q52943","display_name":"Conversation","level":2,"score":0.6052919626235962},{"id":"https://openalex.org/C2780712339","wikidata":"https://www.wikidata.org/wiki/Q5165204","display_name":"Contextualization","level":3,"score":0.6002997756004333},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5963397026062012},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4465119242668152},{"id":"https://openalex.org/C2776760102","wikidata":"https://www.wikidata.org/wiki/Q5139990","display_name":"Code (set theory)","level":3,"score":0.4388556480407715},{"id":"https://openalex.org/C2778755073","wikidata":"https://www.wikidata.org/wiki/Q10858537","display_name":"Scale (ratio)","level":2,"score":0.4369232654571533},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3488925099372864},{"id":"https://openalex.org/C2522767166","wikidata":"https://www.wikidata.org/wiki/Q2374463","display_name":"Data science","level":1,"score":0.3469163179397583},{"id":"https://openalex.org/C207685749","wikidata":"https://www.wikidata.org/wiki/Q2088941","display_name":"Domain knowledge","level":2,"score":0.23623013496398926},{"id":"https://openalex.org/C144024400","wikidata":"https://www.wikidata.org/wiki/Q21201","display_name":"Sociology","level":0,"score":0.19886276125907898},{"id":"https://openalex.org/C46312422","wikidata":"https://www.wikidata.org/wiki/Q11024","display_name":"Communication","level":1,"score":0.1193411648273468},{"id":"https://openalex.org/C205649164","wikidata":"https://www.wikidata.org/wiki/Q1071","display_name":"Geography","level":0,"score":0.10837706923484802},{"id":"https://openalex.org/C58640448","wikidata":"https://www.wikidata.org/wiki/Q42515","display_name":"Cartography","level":1,"score":0.0},{"id":"https://openalex.org/C177264268","wikidata":"https://www.wikidata.org/wiki/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.0},{"id":"https://openalex.org/C527412718","wikidata":"https://www.wikidata.org/wiki/Q855395","display_name":"Interpretation (philosophy)","level":2,"score":0.0},{"id":"https://openalex.org/C199360897","wikidata":"https://www.wikidata.org/wiki/Q9143","display_name":"Programming language","level":1,"score":0.0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2212.10465","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2212.10465","pdf_url":"https://arxiv.org/pdf/2212.10465","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},{"id":"doi:10.48550/arxiv.2212.10465","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2212.10465","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2212.10465","is_oa":true,"landing_page_url":"http://arxiv.org/abs/2212.10465","pdf_url":"https://arxiv.org/pdf/2212.10465","source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":null},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":["https://openalex.org/W1687432146","https://openalex.org/W1591874556","https://openalex.org/W2185608106","https://openalex.org/W3046258185","https://openalex.org/W1548083239","https://openalex.org/W3216994056","https://openalex.org/W2268232908","https://openalex.org/W2039220913","https://openalex.org/W3034569646","https://openalex.org/W3014308185"],"abstract_inverted_index":{"Data":[0],"scarcity":[1],"has":[2],"been":[3],"a":[4,37,53,82],"long":[5],"standing":[6],"issue":[7],"in":[8,62,73],"the":[9,22,113,124],"field":[10],"of":[11,49],"open-domain":[12],"social":[13,28,33,50,131],"dialogue.":[14],"To":[15],"quench":[16],"this":[17],"thirst,":[18],"we":[19,40,79],"present":[20],"SODA:":[21],"first":[23],"publicly":[24],"available,":[25],"million-scale":[26],"high-quality":[27],"dialogue":[29],"dataset.":[30],"By":[31],"contextualizing":[32],"commonsense":[34],"knowledge":[35,38],"from":[36,52],"graph,":[39],"are":[41,64],"able":[42],"to":[43,112,135],"distill":[44],"an":[45],"exceptionally":[46],"broad":[47],"spectrum":[48],"interactions":[51],"large":[54],"language":[55],"model.":[56],"Human":[57],"evaluation":[58],"shows":[59],"that":[60,86],"conversations":[61,128],"SODA":[63],"more":[65,89],"consistent,":[66],"specific,":[67],"and":[68,91,129,140],"(surprisingly)":[69],"natural":[70,90,130],"than":[71,96],"those":[72],"prior":[74],"human-authored":[75],"datasets.":[76],"Using":[77],"SODA,":[78],"train":[80],"COSMO:":[81],"generalizable":[83],"conversation":[84,98],"model":[85],"is":[87,108],"significantly":[88],"consistent":[92],"on":[93,123],"unseen":[94],"datasets":[95],"best-performing":[97],"models":[99],"(e.g.,":[100],"GODEL,":[101],"BlenderBot-1,":[102],"Koala,":[103],"Vicuna).":[104],"Experiments":[105],"reveal":[106],"COSMO":[107],"sometimes":[109],"even":[110],"preferred":[111],"original":[114],"human-written":[115],"gold":[116],"responses.":[117],"Additionally,":[118],"our":[119,137],"results":[120],"shed":[121],"light":[122],"distinction":[125],"between":[126],"knowledge-enriched":[127],"chitchats.":[132],"We":[133],"plan":[134],"make":[136],"data,":[138],"model,":[139],"code":[141],"public.":[142]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2023,"cited_by_count":1}],"updated_date":"2026-06-11T09:08:48.828518","created_date":"2025-10-10T00:00:00"}
