{"id":"https://openalex.org/W4392903115","doi":"https://doi.org/10.1109/icassp48485.2024.10448210","title":"SpeechDPR: End-To-End Spoken Passage Retrieval For Open-Domain Spoken Question Answering","display_name":"SpeechDPR: End-To-End Spoken Passage Retrieval For Open-Domain Spoken Question Answering","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https://openalex.org/W4392903115","doi":"https://doi.org/10.1109/icassp48485.2024.10448210"},"language":"en","primary_location":{"id":"doi:10.1109/icassp48485.2024.10448210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5069664539","display_name":"Chyi-Jiunn Lin","orcid":"https://orcid.org/0000-0002-2447-9330"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Chyi-Jiunn Lin","raw_affiliation_strings":["National Taiwan University"],"affiliations":[{"raw_affiliation_string":"National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5108124525","display_name":"Guan-Ting Lin","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Guan-Ting Lin","raw_affiliation_strings":["National Taiwan University"],"affiliations":[{"raw_affiliation_string":"National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5058729228","display_name":"Yung-Sung Chuang","orcid":"https://orcid.org/0000-0002-1723-5063"},"institutions":[{"id":"https://openalex.org/I63966007","display_name":"Massachusetts Institute of Technology","ror":"https://ror.org/042nb2s44","country_code":"US","type":"education","lineage":["https://openalex.org/I63966007"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yung-Sung Chuang","raw_affiliation_strings":["Massachusetts Institute of Technology"],"affiliations":[{"raw_affiliation_string":"Massachusetts Institute of Technology","institution_ids":["https://openalex.org/I63966007"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5103572986","display_name":"Wei-Lun Wu","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wei-Lun Wu","raw_affiliation_strings":["National Taiwan University"],"affiliations":[{"raw_affiliation_string":"National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5029566548","display_name":"Shang-Wen Li","orcid":"https://orcid.org/0000-0003-0656-9874"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shang-Wen Li","raw_affiliation_strings":["Meta AI"],"affiliations":[{"raw_affiliation_string":"Meta AI","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5101170706","display_name":"Abdelrahman Mohamed","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Abdelrahman Mohamed","raw_affiliation_strings":["Rembrand"],"affiliations":[{"raw_affiliation_string":"Rembrand","institution_ids":[]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5040508737","display_name":"Hung-yi Lee","orcid":"https://orcid.org/0000-0002-9654-5747"},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Hung-Yi Lee","raw_affiliation_strings":["National Taiwan University"],"affiliations":[{"raw_affiliation_string":"National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5044010123","display_name":"Lin-shan Lee","orcid":null},"institutions":[{"id":"https://openalex.org/I16733864","display_name":"National Taiwan University","ror":"https://ror.org/05bqach95","country_code":"TW","type":"education","lineage":["https://openalex.org/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Lin-Shan Lee","raw_affiliation_strings":["National Taiwan University"],"affiliations":[{"raw_affiliation_string":"National Taiwan University","institution_ids":["https://openalex.org/I16733864"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":8,"corresponding_author_ids":["https://openalex.org/A5069664539"],"corresponding_institution_ids":["https://openalex.org/I16733864"],"apc_list":null,"apc_paid":null,"fwci":1.3781,"has_fulltext":false,"cited_by_count":4,"citation_normalized_percentile":{"value":0.82901035,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":97,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"12476","last_page":"12480"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T12031","display_name":"Speech and dialogue systems","score":0.9993000030517578,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8406009674072266},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6520296335220337},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.6365007162094116},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.6031356453895569},{"id":"https://openalex.org/keywords/spoken-language","display_name":"Spoken language","score":0.6024088263511658},{"id":"https://openalex.org/keywords/sentence","display_name":"Sentence","score":0.5934491157531738},{"id":"https://openalex.org/keywords/frame","display_name":"Frame (networking)","score":0.5175039768218994},{"id":"https://openalex.org/keywords/vocabulary","display_name":"Vocabulary","score":0.5151087641716003},{"id":"https://openalex.org/keywords/domain","display_name":"Domain (mathematical analysis)","score":0.5106025338172913},{"id":"https://openalex.org/keywords/speech-recognition","display_name":"Speech recognition","score":0.49788475036621094},{"id":"https://openalex.org/keywords/component","display_name":"Component (thermodynamics)","score":0.4975891411304474},{"id":"https://openalex.org/keywords/representation","display_name":"Representation (politics)","score":0.4521775245666504},{"id":"https://openalex.org/keywords/linguistics","display_name":"Linguistics","score":0.11468639969825745}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8406009674072266},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6520296335220337},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.6365007162094116},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.6031356453895569},{"id":"https://openalex.org/C2776230583","wikidata":"https://www.wikidata.org/wiki/Q1322198","display_name":"Spoken language","level":2,"score":0.6024088263511658},{"id":"https://openalex.org/C2777530160","wikidata":"https://www.wikidata.org/wiki/Q41796","display_name":"Sentence","level":2,"score":0.5934491157531738},{"id":"https://openalex.org/C126042441","wikidata":"https://www.wikidata.org/wiki/Q1324888","display_name":"Frame (networking)","level":2,"score":0.5175039768218994},{"id":"https://openalex.org/C2777601683","wikidata":"https://www.wikidata.org/wiki/Q6499736","display_name":"Vocabulary","level":2,"score":0.5151087641716003},{"id":"https://openalex.org/C36503486","wikidata":"https://www.wikidata.org/wiki/Q11235244","display_name":"Domain (mathematical analysis)","level":2,"score":0.5106025338172913},{"id":"https://openalex.org/C28490314","wikidata":"https://www.wikidata.org/wiki/Q189436","display_name":"Speech recognition","level":1,"score":0.49788475036621094},{"id":"https://openalex.org/C168167062","wikidata":"https://www.wikidata.org/wiki/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.4975891411304474},{"id":"https://openalex.org/C2776359362","wikidata":"https://www.wikidata.org/wiki/Q2145286","display_name":"Representation (politics)","level":3,"score":0.4521775245666504},{"id":"https://openalex.org/C41895202","wikidata":"https://www.wikidata.org/wiki/Q8162","display_name":"Linguistics","level":1,"score":0.11468639969825745},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C17744445","wikidata":"https://www.wikidata.org/wiki/Q36442","display_name":"Political science","level":0,"score":0.0},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.0},{"id":"https://openalex.org/C199539241","wikidata":"https://www.wikidata.org/wiki/Q7748","display_name":"Law","level":1,"score":0.0},{"id":"https://openalex.org/C121332964","wikidata":"https://www.wikidata.org/wiki/Q413","display_name":"Physics","level":0,"score":0.0},{"id":"https://openalex.org/C97355855","wikidata":"https://www.wikidata.org/wiki/Q11473","display_name":"Thermodynamics","level":1,"score":0.0},{"id":"https://openalex.org/C76155785","wikidata":"https://www.wikidata.org/wiki/Q418","display_name":"Telecommunications","level":1,"score":0.0},{"id":"https://openalex.org/C138885662","wikidata":"https://www.wikidata.org/wiki/Q5891","display_name":"Philosophy","level":0,"score":0.0},{"id":"https://openalex.org/C94625758","wikidata":"https://www.wikidata.org/wiki/Q7163","display_name":"Politics","level":2,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109/icassp48485.2024.10448210","is_oa":false,"landing_page_url":"https://doi.org/10.1109/icassp48485.2024.10448210","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":42,"referenced_works":["https://openalex.org/W210770835","https://openalex.org/W1801866228","https://openalex.org/W1984076147","https://openalex.org/W2060951214","https://openalex.org/W2148986421","https://openalex.org/W2171759819","https://openalex.org/W2747874407","https://openalex.org/W2782717521","https://openalex.org/W2885485938","https://openalex.org/W2912817604","https://openalex.org/W2912924812","https://openalex.org/W2950133079","https://openalex.org/W2962854302","https://openalex.org/W2962985038","https://openalex.org/W2963330681","https://openalex.org/W2963339397","https://openalex.org/W2963748441","https://openalex.org/W2963879374","https://openalex.org/W2965373594","https://openalex.org/W2995181338","https://openalex.org/W3019546258","https://openalex.org/W3096109555","https://openalex.org/W3099700870","https://openalex.org/W3102219307","https://openalex.org/W3121694563","https://openalex.org/W3136270197","https://openalex.org/W3148001440","https://openalex.org/W3180374548","https://openalex.org/W4205807230","https://openalex.org/W4221146627","https://openalex.org/W4246962055","https://openalex.org/W4287173589","https://openalex.org/W4319862670","https://openalex.org/W4385571440","https://openalex.org/W4394671563","https://openalex.org/W6608545824","https://openalex.org/W6691476020","https://openalex.org/W6766673545","https://openalex.org/W6779068807","https://openalex.org/W6779872132","https://openalex.org/W6790356757","https://openalex.org/W6795952400"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W3159777597","https://openalex.org/W4212839359","https://openalex.org/W2115758952"],"abstract_inverted_index":{"Spoken":[0],"Question":[1],"Answering":[2],"(SQA)":[3],"is":[4,117,142],"essential":[5],"for":[6,82],"machines":[7],"to":[8,10,30,51,124,145],"reply":[9],"user\u2019s":[11],"question":[12],"by":[13,96],"finding":[14],"the":[15,39,48,58,72,83,87,100,125],"answer":[16,59],"span":[17],"within":[18],"a":[19,61,92],"given":[20],"spoken":[21,62],"passage.":[22],"SQA":[23,44],"has":[24],"been":[25],"previously":[26],"achieved":[27],"without":[28],"ASR":[29,105],"avoid":[31],"recognition":[32,147],"errors":[33],"and":[34,107,130,132],"Out-of-Vocabulary":[35],"(OOV)":[36],"problems.":[37],"However,":[38],"real-world":[40],"problem":[41],"of":[42,86,103,128],"Open-domain":[43],"(openSQA),":[45],"in":[46,64],"which":[47],"machine":[49],"needs":[50],"first":[52,73],"retrieve":[53],"passages":[54],"that":[55],"possibly":[56],"contain":[57],"from":[60,99],"archive":[63],"addition,":[65],"was":[66,137],"never":[67],"considered.":[68],"This":[69],"paper":[70],"proposes":[71],"known":[74],"end-to-end":[75],"frame-work,":[76],"Speech":[77],"Dense":[78],"Passage":[79],"Retriever":[80],"(SpeechDPR),":[81],"retrieval":[84],"component":[85],"openSQA":[88],"problem.":[89],"SpeechDPR":[90],"learns":[91],"sentence-level":[93],"semantic":[94],"representation":[95],"distilling":[97],"knowledge":[98],"cascading":[101,126],"model":[102,127],"unsupervised":[104],"(UASR)":[106],"text":[108],"dense":[109],"retriever":[110],"(TDR).":[111],"No":[112],"manually":[113],"transcribed":[114],"speech":[115,146],"data":[116],"needed.":[118],"Initial":[119],"experiments":[120],"showed":[121],"performance":[122],"comparable":[123],"UASR":[129,136],"TDR,":[131],"significantly":[133],"better":[134],"when":[135],"poor,":[138],"verifying":[139],"this":[140],"approach":[141],"more":[143],"robust":[144],"errors.":[148]},"counts_by_year":[{"year":2025,"cited_by_count":4}],"updated_date":"2026-04-03T22:45:19.894376","created_date":"2025-10-10T00:00:00"}
