{"id":"https://openalex.org/W4416036301","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.706","title":"Transparent and Coherent Procedural Mistake Detection","display_name":"Transparent and Coherent Procedural Mistake Detection","publication_year":2025,"publication_date":"2025-01-01","ids":{"openalex":"https://openalex.org/W4416036301","doi":"https://doi.org/10.18653/v1/2025.emnlp-main.706"},"language":null,"primary_location":{"id":"doi:10.18653/v1/2025.emnlp-main.706","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.706","pdf_url":"https://aclanthology.org/2025.emnlp-main.706.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":true,"oa_status":"gold","oa_url":"https://aclanthology.org/2025.emnlp-main.706.pdf","any_repository_has_fulltext":null},"authorships":[{"author_position":"first","author":{"id":"https://openalex.org/A5001093904","display_name":"Shane Storks","orcid":"https://orcid.org/0000-0002-5826-4426"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Shane Storks","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5093192188","display_name":"Itamar Bar-Yossef","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Itamar Bar-Yossef","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115024033","display_name":"Yayuan Li","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yayuan Li","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5100691427","display_name":"Zheyuan Zhang","orcid":"https://orcid.org/0000-0001-9516-4603"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zheyuan Zhang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https://openalex.org/A5115539406","display_name":"Jason J. Corso","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jason J Corso","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https://openalex.org/A5026638047","display_name":"Joyce Chai","orcid":"https://orcid.org/0000-0002-9658-2230"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Joyce Chai","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":6,"corresponding_author_ids":["https://openalex.org/A5001093904"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":0.0,"has_fulltext":true,"cited_by_count":0,"citation_normalized_percentile":{"value":0.37271084,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":"13979","last_page":"14013"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https://openalex.org/T11856","display_name":"Thermography and Photoacoustic Techniques","score":0.07769999653100967,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},"topics":[{"id":"https://openalex.org/T11856","display_name":"Thermography and Photoacoustic Techniques","score":0.07769999653100967,"subfield":{"id":"https://openalex.org/subfields/2211","display_name":"Mechanics of Materials"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T10876","display_name":"Fault Detection and Control Systems","score":0.03840000182390213,"subfield":{"id":"https://openalex.org/subfields/2207","display_name":"Control and Systems Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}},{"id":"https://openalex.org/T14117","display_name":"Integrated Circuits and Semiconductor Failure Analysis","score":0.02290000021457672,"subfield":{"id":"https://openalex.org/subfields/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https://openalex.org/fields/22","display_name":"Engineering"},"domain":{"id":"https://openalex.org/domains/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https://openalex.org/keywords/mistake","display_name":"Mistake","score":0.5307999849319458},{"id":"https://openalex.org/keywords/noise","display_name":"Noise (video)","score":0.32519999146461487},{"id":"https://openalex.org/keywords/signal-processing","display_name":"Signal processing","score":0.25999999046325684},{"id":"https://openalex.org/keywords/feature","display_name":"Feature (linguistics)","score":0.25870001316070557}],"concepts":[{"id":"https://openalex.org/C2777179996","wikidata":"https://www.wikidata.org/wiki/Q911222","display_name":"Mistake","level":2,"score":0.5307999849319458},{"id":"https://openalex.org/C41008148","wikidata":"https://www.wikidata.org/wiki/Q21198","display_name":"Computer science","level":0,"score":0.4706999957561493},{"id":"https://openalex.org/C99498987","wikidata":"https://www.wikidata.org/wiki/Q2210247","display_name":"Noise (video)","level":3,"score":0.32519999146461487},{"id":"https://openalex.org/C11413529","wikidata":"https://www.wikidata.org/wiki/Q8366","display_name":"Algorithm","level":1,"score":0.29739999771118164},{"id":"https://openalex.org/C154945302","wikidata":"https://www.wikidata.org/wiki/Q11660","display_name":"Artificial intelligence","level":1,"score":0.2732999920845032},{"id":"https://openalex.org/C104267543","wikidata":"https://www.wikidata.org/wiki/Q208163","display_name":"Signal processing","level":3,"score":0.25999999046325684},{"id":"https://openalex.org/C2776401178","wikidata":"https://www.wikidata.org/wiki/Q12050496","display_name":"Feature (linguistics)","level":2,"score":0.25870001316070557},{"id":"https://openalex.org/C31972630","wikidata":"https://www.wikidata.org/wiki/Q844240","display_name":"Computer vision","level":1,"score":0.250900000333786},{"id":"https://openalex.org/C33923547","wikidata":"https://www.wikidata.org/wiki/Q395","display_name":"Mathematics","level":0,"score":0.24899999797344208},{"id":"https://openalex.org/C137270730","wikidata":"https://www.wikidata.org/wiki/Q120811","display_name":"Detection theory","level":3,"score":0.22349999845027924}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.18653/v1/2025.emnlp-main.706","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.706","pdf_url":"https://aclanthology.org/2025.emnlp-main.706.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"}],"best_oa_location":{"id":"doi:10.18653/v1/2025.emnlp-main.706","is_oa":true,"landing_page_url":"https://doi.org/10.18653/v1/2025.emnlp-main.706","pdf_url":"https://aclanthology.org/2025.emnlp-main.706.pdf","source":null,"license":"cc-by","license_id":"https://openalex.org/licenses/cc-by","version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Proceedings of the 2025 Conference on Empirical Methods in Natural Language Processing","raw_type":"proceedings-article"},"sustainable_development_goals":[],"awards":[{"id":"https://openalex.org/G1922422573","display_name":"NRI: INT: COLLAB: Collaborative Task Planning and Learning through Language Communication in a Human-Robot Team","funder_award_id":"1949634","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https://openalex.org/G7760877873","display_name":null,"funder_award_id":"HR00112220003","funder_id":"https://openalex.org/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"},{"id":"https://openalex.org/G848032724","display_name":null,"funder_award_id":"Science","funder_id":"https://openalex.org/F4320306076","funder_display_name":"National Science Foundation"}],"funders":[{"id":"https://openalex.org/F4320306076","display_name":"National Science Foundation","ror":"https://ror.org/021nxhr62"},{"id":"https://openalex.org/F4320309652","display_name":"University of Michigan","ror":"https://ror.org/00jmfr291"},{"id":"https://openalex.org/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"},{"id":"https://openalex.org/F4320332815","display_name":"Advanced Research Projects Agency","ror":"https://ror.org/02caytj08"}],"has_content":{"pdf":true,"grobid_xml":true},"content_urls":{"pdf":"https://content.openalex.org/works/W4416036301.pdf","grobid_xml":"https://content.openalex.org/works/W4416036301.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Procedural":[0],"mistake":[1],"detection":[2],"(PMD)":[3],"is":[4],"a":[5,11,21,25,74,91,167,182],"challenging":[6],"problem":[7],"of":[8,105,169],"classifying":[9],"whether":[10],"human":[12],"user":[13],"(observed":[14],"through":[15],"egocentric":[16],"video)":[17],"has":[18],"successfully":[19,157],"executed":[20],"task":[22],"(specified":[23],"by":[24,131],"procedural":[26],"text).Despite":[27],"significant":[28],"recent":[29,68],"efforts,":[30],"machine":[31],"performance":[32,44],"in":[33,67,184],"the":[34,39,60,103,154,175,178,185,191],"wild":[35],"remains":[36],"nonviable,":[37],"and":[38,126,138],"reasoning":[40],"processes":[41],"underlying":[42],"this":[43,111],"are":[45],"opaque.As":[46],"such,":[47],"we":[48,72,89],"extend":[49],"PMD":[50,79],"to":[51,57,97,171],"require":[52],"generating":[53],"visual":[54],"self-dialog":[55],"rationales":[56],"inform":[58],"decisions.Given":[59],"impressive,":[61],"mature":[62],"image":[63],"understanding":[64],"capabilities":[65],"observed":[66],"visionand-language":[69],"models":[70],"(VLMs),":[71],"curate":[73],"suitable":[75],"benchmark":[76],"dataset":[77],"for":[78,102,110,149],"based":[80],"on":[81,177,190],"individual":[82],"frames.As":[83],"our":[84,141],"reformulation":[85],"enables":[86],"unprecedented":[87],"transparency,":[88],"leverage":[90],"natural":[92],"language":[93],"inference":[94,137],"(NLI)":[95],"model":[96],"formulate":[98],"two":[99],"automated":[100],"metrics":[101,134,143],"coherence":[104],"generated":[106],"rationales.We":[107],"establish":[108],"baselines":[109],"reframed":[112],"task,":[113],"showing":[114],"that":[115],"VLMs":[116],"struggle":[117],"off-the-shelf,":[118],"but":[119],"with":[120],"some":[121],"trade-offs,":[122],"their":[123],"accuracy,":[124],"coherence,":[125],"efficiency":[127],"can":[128],"be":[129],"improved":[130],"incorporating":[132],"these":[133],"into":[135],"common":[136,145],"finetuning":[139],"methods.Lastly,":[140],"multi-faceted":[142],"visualize":[144],"outcomes,":[146],"highlighting":[147],"areas":[148],"further":[150],"improvement.":[151],"Success/Mistake":[152],"ClassificationHas":[153],"procedure":[155],"been":[156],"completed?":[158],"Visual":[159,163],"Question":[160,164],"Answering":[161],"(VQA)":[162],"Generation":[165],"(VQG)Ask":[166],"series":[168],"questions":[170],"gather":[172],"information":[173],"Procedure:Unclip":[174],"pegs":[176,189],"cloth.1.":[179],"Is":[180,193],"there":[181,188,194],"cloth":[183],"image?2.":[186],"Are":[187],"cloth?3.":[192],"someone":[195],"holding":[196],"pegs?":[197]},"counts_by_year":[],"updated_date":"2026-04-10T15:06:20.359241","created_date":"2025-11-08T00:00:00"}
