{"id":"https://openalex.org/W2983309655","doi":"https://doi.org/10.18653/v1/d19-5817","title":"Evaluating Question Answering Evaluation","display_name":"Evaluating Question Answering Evaluation","publication_year":2019,"publication_date":"2019-01-01","ids":{"openalex":"https://openalex.org/W2983309655","doi":"https://doi.org/10.18653/v1/d19-5817","mag":"2983309655"},"language":"en","primary_location":{"id":"doi:10.18653/v1/d19-5817","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-5817","pdf_url":"https://www.aclweb.org/anthology/D19-5817.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd Workshop on Machine Reading for Question Answering","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://www.aclweb.org/anthology/D19-5817.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5087171054","display_name":"Anthony Chen","orcid":null},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Anthony Chen","raw_affiliation_strings":["University of California, Irvine, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5082136238","display_name":"Gabriel Stanovsky","orcid":"https://orcid.org/0000-0002-2420-8979"},"institutions":[{"id":"https://openalex.org/I4210156221","display_name":"Allen Institute for Artificial Intelligence","ror":"https://ror.org/05w520734","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210156221"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Gabriel Stanovsky","raw_affiliation_strings":["Allen Institute for Artificial Intelligence, Seattle, Washington, USA"],"affiliations":[{"raw_affiliation_string":"Allen Institute for Artificial Intelligence, Seattle, Washington, USA","institution_ids":["https://openalex.org/I4210156221"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5005779128","display_name":"Sameer Singh","orcid":"https://orcid.org/0000-0003-0621-6323"},"institutions":[{"id":"https://openalex.org/I204250578","display_name":"University of California, Irvine","ror":"https://ror.org/04gyf1771","country_code":"US","type":"education","lineage":["https://openalex.org/I204250578"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Sameer Singh","raw_affiliation_strings":["University of California, Irvine, USA"],"affiliations":[{"raw_affiliation_string":"University of California, Irvine, USA","institution_ids":["https://openalex.org/I204250578"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5035088083","display_name":"Matt Gardner","orcid":"https://orcid.org/0000-0001-8458-1727"},"institutions":[{"id":"https://openalex.org/I4210156221","display_name":"Allen Institute for Artificial Intelligence","ror":"https://ror.org/05w520734","country_code":"US","type":"nonprofit","lineage":["https://openalex.org/I4210156221"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Matt Gardner","raw_affiliation_strings":["Allen Institute for Artificial Intelligence, Irvine, California, USA"],"affiliations":[{"raw_affiliation_string":"Allen Institute for Artificial Intelligence, Irvine, California, USA","institution_ids":["https://openalex.org/I4210156221"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5087171054"],"corresponding_institution_ids":["https://openalex.org/I204250578"],"apc_list":null,"apc_paid":null,"fwci":5.7806,"has_fulltext":true,"cited_by_count":90,"citation_normalized_percentile":{"value":0.96822514,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":90,"max":100},"biblio":{"volume":null,"issue":null,"first_page":"119","last_page":"124"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T10028","display_name":"Topic Modeling","score":1.0,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10181","display_name":"Natural Language Processing Techniques","score":0.9997000098228455,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T11714","display_name":"Multimodal Machine Learning Applications","score":0.9937000274658203,"subfield":{"id":"https://openalex.org/subfields/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.8312076330184937},{"id":"https://openalex.org/keywords/metric","display_name":"Metric (unit)","score":0.7690027952194214},{"id":"https://openalex.org/keywords/question-answering","display_name":"Question answering","score":0.6369807124137878},{"id":"https://openalex.org/keywords/context","display_name":"Context (archaeology)","score":0.5909161567687988},{"id":"https://openalex.org/keywords/similarity","display_name":"Similarity (geometry)","score":0.5617334842681885},{"id":"https://openalex.org/keywords/matching","display_name":"Matching (statistics)","score":0.5028263926506042},{"id":"https://openalex.org/keywords/limit","display_name":"Limit (mathematics)","score":0.46041470766067505},{"id":"https://openalex.org/keywords/artificial-intelligence","display_name":"Artificial intelligence","score":0.4596973657608032},{"id":"https://openalex.org/keywords/machine-learning","display_name":"Machine learning","score":0.4439907371997833},{"id":"https://openalex.org/keywords/generative-grammar","display_name":"Generative grammar","score":0.42290470004081726},{"id":"https://openalex.org/keywords/information-retrieval","display_name":"Information retrieval","score":0.39788517355918884},{"id":"https://openalex.org/keywords/data-mining","display_name":"Data mining","score":0.3857901692390442},{"id":"https://openalex.org/keywords/natural-language-processing","display_name":"Natural language processing","score":0.3277657926082611},{"id":"https://openalex.org/keywords/statistics","display_name":"Statistics","score":0.08875894546508789},{"id":"https://openalex.org/keywords/mathematics","display_name":"Mathematics","score":0.08637061715126038}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.8312076330184937},{"id":"https://openalex.org/C176217482","wikidata":"https://www.wikidata.org/wiki/Q860554","display_name":"Metric (unit)","level":2,"score":0.7690027952194214},{"id":"https://openalex.org/C44291984","wikidata":"https://www.wikidata.org/wiki/Q1074173","display_name":"Question answering","level":2,"score":0.6369807124137878},{"id":"https://openalex.org/C2779343474","wikidata":"https://www.wikidata.org/wiki/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.5909161567687988},{"id":"https://openalex.org/C103278499","wikidata":"https://www.wikidata.org/wiki/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.5617334842681885},{"id":"https://openalex.org/C165064840","wikidata":"https://www.wikidata.org/wiki/Q1321061","display_name":"Matching (statistics)","level":2,"score":0.5028263926506042},{"id":"https://openalex.org/C151201525","wikidata":"https://www.wikidata.org/wiki/Q177239","display_name":"Limit (mathematics)","level":2,"score":0.46041470766067505},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.4596973657608032},{"id":"https://openalex.org/C119857082","wikidata":"https://www.wikidata.org/wiki/Q2539","display_name":"Machine learning","level":1,"score":0.4439907371997833},{"id":"https://openalex.org/C39890363","wikidata":"https://www.wikidata.org/wiki/Q36108","display_name":"Generative grammar","level":2,"score":0.42290470004081726},{"id":"https://openalex.org/C23123220","wikidata":"https://www.wikidata.org/wiki/Q816826","display_name":"Information retrieval","level":1,"score":0.39788517355918884},{"id":"https://openalex.org/C124101348","wikidata":"https://www.wikidata.org/wiki/Q172491","display_name":"Data mining","level":1,"score":0.3857901692390442},{"id":"https://openalex.org/C204321447","wikidata":"https://www.wikidata.org/wiki/Q30642","display_name":"Natural language processing","level":1,"score":0.3277657926082611},{"id":"https://openalex.org/C105795698","wikidata":"https://www.wikidata.org/wiki/Q12483","display_name":"Statistics","level":1,"score":0.08875894546508789},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.08637061715126038},{"id":"https://openalex.org/C134306372","wikidata":"https://www.wikidata.org/wiki/Q7754","display_name":"Mathematical analysis","level":1,"score":0.0},{"id":"https://openalex.org/C151730666","wikidata":"https://www.wikidata.org/wiki/Q7205","display_name":"Paleontology","level":1,"score":0.0},{"id":"https://openalex.org/C115961682","wikidata":"https://www.wikidata.org/wiki/Q860623","display_name":"Image (mathematics)","level":2,"score":0.0},{"id":"https://openalex.org/C162324750","wikidata":"https://www.wikidata.org/wiki/Q8134","display_name":"Economics","level":0,"score":0.0},{"id":"https://openalex.org/C86803240","wikidata":"https://www.wikidata.org/wiki/Q420","display_name":"Biology","level":0,"score":0.0},{"id":"https://openalex.org/C21547014","wikidata":"https://www.wikidata.org/wiki/Q1423657","display_name":"Operations management","level":1,"score":0.0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/d19-5817","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-5817","pdf_url":"https://www.aclweb.org/anthology/D19-5817.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd Workshop on Machine Reading for Question Answering","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/d19-5817","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/d19-5817","pdf_url":"https://www.aclweb.org/anthology/D19-5817.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2nd Workshop on Machine Reading for Question Answering","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https://content.openalex.org/works/W2983309655.pdf","grobid_xml":"https://content.openalex.org/works/W2983309655.grobid-xml"},"referenced_works_count":25,"referenced_works":["https://openalex.org/W2101105183","https://openalex.org/W2123301721","https://openalex.org/W2144746247","https://openalex.org/W2154652894","https://openalex.org/W2551396370","https://openalex.org/W2806055002","https://openalex.org/W2888812214","https://openalex.org/W2892280852","https://openalex.org/W2896457183","https://openalex.org/W2898695519","https://openalex.org/W2919420119","https://openalex.org/W2936695845","https://openalex.org/W2947771965","https://openalex.org/W2962739339","https://openalex.org/W2963341956","https://openalex.org/W2963748441","https://openalex.org/W2963866616","https://openalex.org/W2963919867","https://openalex.org/W2963957489","https://openalex.org/W2963963993","https://openalex.org/W2963995027","https://openalex.org/W2964042428","https://openalex.org/W2987553933","https://openalex.org/W2996403597","https://openalex.org/W4288548690"],"related_works":["https://openalex.org/W2384605597","https://openalex.org/W2387743295","https://openalex.org/W2115758952","https://openalex.org/W3082787378","https://openalex.org/W2136007095","https://openalex.org/W2366230879","https://openalex.org/W3208425359","https://openalex.org/W2349927912","https://openalex.org/W4212839359","https://openalex.org/W4288102755"],"abstract_inverted_index":{"As":[0,191],"the":[1,66,127,146,160],"complexity":[2,147],"of":[3,58,68,148,162],"question":[4],"answering":[5],"(QA)":[6],"datasets":[7,89,92,150],"evolve,":[8],"moving":[9],"away":[10],"from":[11],"restricted":[12],"formats":[13],"like":[14,168],"span":[15],"extraction":[16],"and":[17,47,54,178],"multiplechoice":[18],"(MC)":[19],"to":[20,27,171,174,218,233],"free-form":[21,163],"answer":[22,128],"generation,":[23],"it":[24,216],"is":[25,38,93,107,156],"imperative":[26],"understand":[28],"how":[29],"well":[30,83],"current":[31,80,96,135],"metrics":[32,43,70,81,136,184],"perform":[33],"in":[34,71,159],"evaluating":[35,208],"QA.":[36,72,211],"This":[37,155],"especially":[39,157],"important":[40],"as":[41],"existing":[42,69,85,141],"(BLEU,":[44],"ROUGE,":[45],"METEOR,":[46],"F1)":[48],"are":[49],"computed":[50],"using":[51,201],"n-gram":[52,188],"similarity":[53],"have":[55],"a":[56,108,192,195,203,230],"number":[57],"well-known":[59],"drawbacks.":[60],"In":[61],"this":[62],"work,":[63],"we":[64,76,166,199],"study":[65,131],"suitability":[67],"For":[73],"generative":[74],"QA,":[75,103,164],"show":[77,112],"that":[78,113,133,151,185,214],"while":[79,134],"do":[82],"on":[84,126,228],"datasets,":[86,143],"converting":[87],"multiple-choice":[88],"into":[90],"free-response":[91],"challenging":[94],"for":[95,120,140,207,210],"metrics.":[97],"We":[98,111,212],"also":[99],"look":[100],"at":[101],"span-based":[102],"where":[104,165],"F":[105,114],"1":[106,115],"reasonable":[109],"metric.":[110],"may":[116,137,236],"not":[117],"be":[118,138,153,172],"suitable":[119,139],"all":[121],"extractive":[122],"QA":[123,142,149,197,234],"tasks":[124],"depending":[125],"types.":[129],"Our":[130],"suggests":[132],"they":[144],"limit":[145],"can":[152],"created.":[154],"true":[158],"context":[161],"would":[167],"our":[169],"models":[170],"able":[173],"generate":[175],"more":[176],"complex":[177],"abstractive":[179],"answers,":[180],"thus":[181],"necessitating":[182],"new":[183],"go":[186],"beyond":[187],"based":[189],"matching.":[190],"step":[193],"towards":[194],"better":[196],"metric,":[198],"explore":[200],"BERTScore,":[202],"recently":[204],"proposed":[205],"metric":[206,232],"translation,":[209],"find":[213],"although":[215],"fails":[217],"provide":[219],"stronger":[220],"correlation":[221],"with":[222],"human":[223],"judgements,":[224],"future":[225],"work":[226],"focused":[227],"tailoring":[229],"BERT-based":[231],"evaluation":[235],"prove":[237],"fruitful.":[238]},"counts_by_year":[{"year":2026,"cited_by_count":5},{"year":2025,"cited_by_count":18},{"year":2024,"cited_by_count":21},{"year":2023,"cited_by_count":6},{"year":2022,"cited_by_count":15},{"year":2021,"cited_by_count":16},{"year":2020,"cited_by_count":8},{"year":2019,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}
