To evaluate VLM performance across the three masking strategies, we first test object recognition in two settings: when the private object is the focus of the question, and when a control object is the focus with the private object in the background.
We also evaluate VLM performance on the VQA task using realistic, human-asked questions. Note that, high-risk masking achieves an answerability rate closest to that of the full image.
@article{ji@2025posetraj,
author = {Murrugarra-Llerena, Jeffri and Haoran, Niu and K.Suzanne, Barber and Daume III, Hal and Trista Cao, Yang and Cascante-Bonilla, Paola},
title = {Beyond Blanket Masking: Examining Granularity for Privac Protection in Images Captured by Blind and Low Vision Users},
journal = {COLM},
year = {2025},
}