{"id":"https://openalex.org/W7117261049","doi":"https://doi.org/10.48550/arxiv.2512.20204","title":"Corpus of Cross-lingual Dialogues with Minutes and Detection of Misunderstandings","display_name":"Corpus of Cross-lingual Dialogues with Minutes and Detection of Misunderstandings","publication_year":2025,"publication_date":"2025-12-23","ids":{"openalex":"https://openalex.org/W7117261049","doi":"https://doi.org/10.48550/arxiv.2512.20204"},"language":null,"primary_location":{"id":"doi:10.48550/arxiv.2512.20204","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.20204","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"type":"preprint","indexed_in":["datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https://doi.org/10.48550/arxiv.2512.20204","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5119372292","display_name":"Marko \u010cechovi\u010d","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"\u010cechovi\u010d, Marko","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5119372293","display_name":"Nat\u00e1lia Komorn\u00edkov\u00e1","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Komorn\u00edkov\u00e1, Nat\u00e1lia","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5056031235","display_name":"Dominik Mach\u00e1\u010dek","orcid":"https://orcid.org/0000-0002-5530-1615"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mach\u00e1\u010dek, Dominik","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5121298095","display_name":"Ond\u0159ej Bojar","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Bojar, Ond\u0159ej","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5119372292"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.6154999732971191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.6154999732971191,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.2387000024318695,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T13629","display_name":"Text Readability and Simplification","score":0.026599999517202377,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/task","display_name":"Task (project management)","score":0.6484000086784363},{"id":"https://openalex.org/keywords/recall","display_name":"Recall","score":0.5296000242233276},{"id":"https://openalex.org/keywords/corpus-linguistics","display_name":"Corpus linguistics","score":0.41370001435279846},{"id":"https://openalex.org/keywords/computational-linguistics","display_name":"Computational linguistics","score":0.4018999934196472},{"id":"https://openalex.org/keywords/language-model","display_name":"Language model","score":0.35530000925064087},{"id":"https://openalex.org/keywords/speech-corpus","display_name":"Speech corpus","score":0.34119999408721924},{"id":"https://openalex.org/keywords/text-corpus","display_name":"Text corpus","score":0.33889999985694885}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.775600016117096},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.7476999759674072},{"id":"https://openalex.org/C2780451532","wikidata":"https://www.wikidata.org/wiki/Q759676","display_name":"Task (project management)","level":2,"score":0.6484000086784363},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6118999719619751},{"id":"https://openalex.org/C100660578","wikidata":"https://www.wikidata.org/wiki/Q18733","display_name":"Recall","level":2,"score":0.5296000242233276},{"id":"https://openalex.org/C532629269","wikidata":"https://www.wikidata.org/wiki/Q865083","display_name":"Corpus linguistics","level":2,"score":0.41370001435279846},{"id":"https://openalex.org/C155092808","wikidata":"https://www.wikidata.org/wiki/Q182557","display_name":"Computational linguistics","level":2,"score":0.4018999934196472},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.388700008392334},{"id":"https://openalex.org/C137293760","wikidata":"https://www.wikidata.org/wiki/Q3621696","display_name":"Language model","level":2,"score":0.35530000925064087},{"id":"https://openalex.org/C91863865","wikidata":"https://www.wikidata.org/wiki/Q4349497","display_name":"Speech corpus","level":3,"score":0.34119999408721924},{"id":"https://openalex.org/C2474386","wikidata":"https://www.wikidata.org/wiki/Q461183","display_name":"Text corpus","level":2,"score":0.33889999985694885},{"id":"https://openalex.org/C504749915","wikidata":"https://www.wikidata.org/wiki/Q9010971","display_name":"Speech technology","level":3,"score":0.326200008392334},{"id":"https://openalex.org/C175154964","wikidata":"https://www.wikidata.org/wiki/Q380077","display_name":"Task analysis","level":3,"score":0.3138999938964844},{"id":"https://openalex.org/C203005215","wikidata":"https://www.wikidata.org/wiki/Q79798","display_name":"Machine translation","level":2,"score":0.31380000710487366},{"id":"https://openalex.org/C2777267654","wikidata":"https://www.wikidata.org/wiki/Q3519023","display_name":"Test (biology)","level":2,"score":0.3100000023841858},{"id":"https://openalex.org/C81669768","wikidata":"https://www.wikidata.org/wiki/Q2359161","display_name":"Precision and recall","level":2,"score":0.30309998989105225},{"id":"https://openalex.org/C61328038","wikidata":"https://www.wikidata.org/wiki/Q3358061","display_name":"Speech processing","level":2,"score":0.29750001430511475},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.27959999442100525},{"id":"https://openalex.org/C2983448237","wikidata":"https://www.wikidata.org/wiki/Q1078276","display_name":"Language understanding","level":2,"score":0.2619999945163727}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.48550/arxiv.2512.20204","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.20204","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"doi:10.48550/arxiv.2512.20204","is_oa":true,"landing_page_url":"https://doi.org/10.48550/arxiv.2512.20204","pdf_url":null,"source":{"id":"https://openalex.org/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https://openalex.org/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https://openalex.org/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":null,"is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"article"},"sustainable_development_goals":[{"score":0.7387698292732239,"display_name":"Quality Education","id":"https://metadata.un.org/sdg/4"}],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Speech":[0],"processing":[1],"and":[2,30,39,71,78,80,116,131,164],"translation":[3],"technology":[4],"have":[5],"the":[6,86,101,134,149],"potential":[7],"to":[8,121,141,154],"facilitate":[9],"meetings":[10],"of":[11,43,63,66,88,100,108,113,136,162,166],"individuals":[12,47],"who":[13,52],"do":[14],"not":[15],"share":[16],"any":[17],"common":[18,50],"language.":[19],"To":[20],"evaluate":[21],"automatic":[22,56,79,106],"systems":[23],"for":[24],"such":[25],"a":[26,28,41,49],"task,":[27],"versatile":[29],"realistic":[31],"evaluation":[32],"corpus":[33,42,61,94],"is":[34,152],"needed.":[35],"Therefore,":[36],"we":[37,104,119],"create":[38],"present":[40],"cross-lingual":[44,91,125],"dialogues":[45],"between":[46],"without":[48],"language":[51,139],"were":[53],"facilitated":[54],"by":[55],"simultaneous":[57],"speech":[58,67],"translation.":[59],"The":[60,145],"consists":[62],"5":[64],"hours":[65],"recordings":[68],"with":[69,158,160],"ASR":[70],"gold":[72],"transcripts":[73],"in":[74,124],"12":[75],"original":[76],"languages":[77],"corrected":[81],"translations":[82],"into":[83,90],"English.":[84],"For":[85,110],"purposes":[87],"research":[89],"summarization,":[92],"our":[93],"also":[95,132],"includes":[96],"written":[97],"summaries":[98],"(minutes)":[99],"meetings.":[102,126],"Moreover,":[103],"propose":[105],"detection":[107],"misunderstandings.":[109],"an":[111],"overview":[112],"this":[114],"task":[115],"its":[117],"complexity,":[118],"attempt":[120],"quantify":[122],"misunderstandings":[123,129,159],"We":[127],"annotate":[128],"manually":[130],"test":[133],"ability":[135],"current":[137],"large":[138],"models":[140],"detect":[142],"them":[143],"automatically.":[144],"results":[146],"show":[147],"that":[148],"Gemini":[150],"model":[151],"able":[153],"identify":[155],"text":[156],"spans":[157],"recall":[161],"77%":[163],"precision":[165],"47%.":[167]},"counts_by_year":[],"updated_date":"2025-12-25T23:15:44.422516","created_date":"2025-12-25T00:00:00"}
