{"id":"https://openalex.org/W4412376914","doi":"https://doi.org/10.1145/3726302.3730190","title":"Evaluating LLMs' (In)ability to Follow Prompts in QA Tasks","display_name":"Evaluating LLMs' (In)ability to Follow Prompts in QA Tasks","publication_year":2025,"publication_date":"2025-07-13","ids":{"openalex":"https://openalex.org/W4412376914","doi":"https://doi.org/10.1145/3726302.3730190"},"language":"en","primary_location":{"id":"doi:10.1145/3726302.3730190","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730190","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730190","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730190","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5016242562","display_name":"Aparup Khatua","orcid":"https://orcid.org/0000-0001-8235-1637"},"institutions":[{"id":"https://openalex.org/I27837315","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291","country_code":"US","type":"education","lineage":["https://openalex.org/I27837315"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Aparup Khatua","raw_affiliation_strings":["University of Michigan, Ann Arbor, MI, USA"],"raw_orcid":"https://orcid.org/0000-0001-8235-1637","affiliations":[{"raw_affiliation_string":"University of Michigan, Ann Arbor, MI, USA","institution_ids":["https://openalex.org/I27837315"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5118969044","display_name":"Tobias Kalmbach","orcid":null},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]},{"id":"https://openalex.org/I4210136150","display_name":"L3S Research Center","ror":"https://ror.org/039t4wk02","country_code":"DE","type":"facility","lineage":["https://openalex.org/I114112103","https://openalex.org/I4210136150","https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Tobias Kalmbach","raw_affiliation_strings":["L3S Research Center, Leibniz University, Hannover, Germany"],"raw_orcid":"https://orcid.org/0009-0005-7447-2195","affiliations":[{"raw_affiliation_string":"L3S Research Center, Leibniz University, Hannover, Germany","institution_ids":["https://openalex.org/I4210136150","https://openalex.org/I114112103"]}]},{"author_position":"middle","author":{"id":"https://openalex.org/A5009542542","display_name":"Prasenjit Mitra","orcid":"https://orcid.org/0000-0002-7530-9497"},"institutions":[{"id":"https://openalex.org/I4210130200","display_name":"Carnegie Mellon University Africa","ror":"https://ror.org/02f33m021","country_code":"RW","type":"education","lineage":["https://openalex.org/I4210130200","https://openalex.org/I74973139"]}],"countries":["RW"],"is_corresponding":false,"raw_author_name":"Prasenjit Mitra","raw_affiliation_strings":["Carnegie Mellon, Africa, Kigali, Rwanda"],"raw_orcid":"https://orcid.org/0000-0002-7530-9497","affiliations":[{"raw_affiliation_string":"Carnegie Mellon, Africa, Kigali, Rwanda","institution_ids":["https://openalex.org/I4210130200"]}]},{"author_position":"last","author":{"id":"https://openalex.org/A5102805562","display_name":"Sandipan Sikdar","orcid":"https://orcid.org/0000-0001-8957-7364"},"institutions":[{"id":"https://openalex.org/I114112103","display_name":"Leibniz University Hannover","ror":"https://ror.org/0304hq317","country_code":"DE","type":"education","lineage":["https://openalex.org/I114112103"]},{"id":"https://openalex.org/I4210136150","display_name":"L3S Research Center","ror":"https://ror.org/039t4wk02","country_code":"DE","type":"facility","lineage":["https://openalex.org/I114112103","https://openalex.org/I4210136150","https://openalex.org/I94509681"]}],"countries":["DE"],"is_corresponding":false,"raw_author_name":"Sandipan Sikdar","raw_affiliation_strings":["L3S Research Center, Leibniz University, Hannover, Germany"],"raw_orcid":"https://orcid.org/0000-0001-8957-7364","affiliations":[{"raw_affiliation_string":"L3S Research Center, Leibniz University, Hannover, Germany","institution_ids":["https://openalex.org/I4210136150","https://openalex.org/I114112103"]}]}],"institutions":[],"countries_distinct_count":3,"institutions_distinct_count":4,"corresponding_author_ids":["https://openalex.org/A5016242562"],"corresponding_institution_ids":["https://openalex.org/I27837315"],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.07989154,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"2941","last_page":"2945"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.9018999934196472,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11902","display_name":"Intelligent Tutoring Systems and Adaptive Learning","score":0.9018999934196472,"subfield":{"id":"https://openalex.org/subfields/1702","display_name":"Artificial Intelligence"},"field":{"id":"https://openalex.org/fields/17","display_name":"Computer Science"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/computer-science","display_name":"Computer science","score":0.5888955593109131}],"concepts":[{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.5888955593109131}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1145/3726302.3730190","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730190","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730190","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.1145/3726302.3730190","is_oa":true,"landing_page_url":"https://doi.org/10.1145/3726302.3730190","pdf_url":"https://dl.acm.org/doi/pdf/10.1145/3726302.3730190","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 48th International ACM SIGIR Conference on Research and Development in Information Retrieval","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G367552324","display_name":null,"funder_award_id":"01DD20003","funder_id":"https://openalex.org/F4320321114","funder_display_name":"Bundesministerium f\u00fcr Bildung und Forschung"}],"funders":[{"id":"https://openalex.org/F4320313139","display_name":"Nieders\u00e4chsische Ministerium f\u00fcr Wissenschaft und Kultur","ror":"https://ror.org/0116z8r77"},{"id":"https://openalex.org/F4320320882","display_name":"Volkswagen Foundation","ror":"https://ror.org/03bsmfz84"},{"id":"https://openalex.org/F4320321114","display_name":"Bundesministerium f\u00fcr Bildung und Forschung","ror":"https://ror.org/04pz7b180"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4412376914.pdf","grobid_xml":"https://content.openalex.org/works/W4412376914.grobid-xml"},"referenced_works_count":4,"referenced_works":["https://openalex.org/W4205460703","https://openalex.org/W4309674289","https://openalex.org/W4384918448","https://openalex.org/W6604065363"],"related_works":["https://openalex.org/W4391375266","https://openalex.org/W2899084033","https://openalex.org/W2748952813","https://openalex.org/W2390279801","https://openalex.org/W4391913857","https://openalex.org/W2358668433","https://openalex.org/W4396701345","https://openalex.org/W2376932109","https://openalex.org/W2001405890","https://openalex.org/W4396696052"],"abstract_inverted_index":{"While":[0],"LLMs":[1,39,76,81,156,177,186],"have":[2,160],"achieved":[3],"impressive":[4],"performance":[5,68],"across":[6],"various":[7],"tasks,":[8,32],"one":[9],"under-explored":[10],"area":[11],"is":[12,37,87],"evaluating":[13],"their":[14,41,85],"ability":[15,109],"to":[16,62,72,106,110,127,159,189],"follow":[17,63,111,190],"instructions":[18,191],"provided":[19,48],"in":[20,49,92,162],"the":[21,27,46,50],"prompt":[22,51],"when":[23,52,59,84],"generating":[24,53],"responses.":[25],"In":[26],"context":[28,47],"of":[29,144],"question-answering":[30],"(QA)":[31],"a":[33,137],"crucial":[34],"research":[35,98],"gap":[36],"whether":[38],"prioritize":[40],"own":[42],"parametric":[43,132,172],"knowledge":[44,86],"or":[45,165],"an":[54,103],"answer.":[55],"Ignoring":[56],"prompts,":[57],"even":[58],"explicitly":[60],"instructed":[61],"them,":[64],"may":[65],"adversely":[66],"affect":[67],"and":[69,89,131,149,167,183,193],"potentially":[70],"lead":[71],"unintended":[73],"consequences.":[74],"Additionally,":[75],"should":[77,82],"be":[78,121,169],"self-reflective":[79],"(i.e.,":[80,125],"recognize":[83],"inadequate)":[88],"avoid":[90],"hallucinations":[91],"such":[93,117],"scenarios.":[94],"To":[95],"address":[96],"our":[97,179],"question,":[99],"we":[100,135],"propose":[101],"Oedipus,":[102],"evaluation":[104,181],"framework":[105,182],"evaluate":[107,175],"LLMs'":[108],"prompts.":[112],"We":[113,174],"further":[114],"note":[115],"that":[116,155,185],"abilities":[118],"could":[119],"also":[120],"influenced":[122],"by":[123],"contamination":[124],"exposure":[126],"datasets":[128],"during":[129],"training)":[130],"knowledge.":[133,173],"Consequently,":[134],"develop":[136],"novel":[138],"QA":[139],"dataset":[140],"with":[141,152],"four":[142],"types":[143],"contexts-":[145],"correct,":[146],"masked,":[147],"noisy,":[148],"absurd":[150],"contexts":[151],"recent":[153],"questions":[154],"are":[157,194],"unlikely":[158],"encountered":[161],"pre-training":[163],"data":[164],"corpus":[166],"cannot":[168],"answered":[170],"from":[171],"eight":[176],"through":[178],"proposed":[180],"observe":[184],"often":[187],"fail":[188],"correctly":[192],"not":[195],"self-reflective.":[196]},"counts_by_year":[],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}
