#!/bin/bash

# ceph_deferred_size.sh - Show the BlueStore deferred-write threshold
# (bluestore_prefer_deferred_size_hdd) for every HDD-class OSD.

set -euo pipefail

RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m'

PARAM="bluestore_prefer_deferred_size_hdd"
USE_TELL=true

usage() {
    cat <<EOF
Usage: $0 [options]

Reports the BlueStore deferred-write threshold ($PARAM)
for every HDD-class OSD in the cluster. Writes smaller than this many
bytes are deferred (buffered in RocksDB before being flushed to disk).

Options:
  --param NAME   Query a different config option (default: $PARAM)
  --no-tell      Skip 'ceph tell' (running value) and only read configured
                 values via 'ceph config get'. Much faster on large clusters,
                 but won't pick up runtime overrides set with injectargs.
  -h, --help     Show this help message
EOF
}

while [[ $# -gt 0 ]]; do
    case $1 in
        --param)   PARAM="$2"; shift 2 ;;
        --no-tell) USE_TELL=false; shift ;;
        -h|--help) usage; exit 0 ;;
        *) echo "Unknown option: $1" >&2; usage >&2; exit 1 ;;
    esac
done

if ! command -v ceph >/dev/null; then
    echo -e "${RED}Error: 'ceph' command not found${NC}" >&2
    exit 1
fi
if ! command -v jq >/dev/null; then
    echo -e "${RED}Error: 'jq' is required${NC}" >&2
    exit 1
fi

# Collect HDD-class OSD IDs.
mapfile -t hdd_osds < <(ceph osd crush class ls-osd hdd -f json 2>/dev/null | jq -r '.[]' | sort -n)

if [[ ${#hdd_osds[@]} -eq 0 ]]; then
    echo -e "${YELLOW}No HDD-class OSDs found.${NC}"
    exit 0
fi

# Cluster-wide default (from the config DB). May be empty if never set.
cluster_default=$(ceph config get global "$PARAM" 2>/dev/null || true)
cluster_default=${cluster_default//$'\n'/}

echo -e "${CYAN}Parameter:${NC} $PARAM"
if [[ -n "$cluster_default" ]]; then
    echo -e "${CYAN}Cluster default (global):${NC} ${GREEN}${cluster_default}${NC}"
else
    echo -e "${CYAN}Cluster default (global):${NC} (unset; OSD compiled-in default applies)"
fi
echo -e "${CYAN}HDD-class OSDs:${NC} ${#hdd_osds[@]}"
echo

printf "%-10s %-20s %-10s\n" "OSD" "VALUE (bytes)" "SOURCE"
printf "%-10s %-20s %-10s\n" "----------" "--------------------" "----------"

declare -A value_count
unreachable=()

for id in "${hdd_osds[@]}"; do
    value=""
    source=""

    if $USE_TELL; then
        if raw=$(ceph tell "osd.$id" config get "$PARAM" 2>/dev/null); then
            value=$(printf '%s' "$raw" | jq -r --arg k "$PARAM" '.[$k] // empty')
            [[ -n "$value" ]] && source="running"
        fi
    fi

    if [[ -z "$value" ]]; then
        if value=$(ceph config get "osd.$id" "$PARAM" 2>/dev/null); then
            value=${value//$'\n'/}
            source="config"
        fi
    fi

    if [[ -z "$value" ]]; then
        value="(unknown)"
        source="error"
        unreachable+=("$id")
    fi

    color=$BLUE
    if [[ "$source" == "error" ]]; then
        color=$RED
    elif [[ -n "$cluster_default" && "$value" != "$cluster_default" ]]; then
        color=$YELLOW
    fi

    printf "${color}%-10s %-20s %-10s${NC}\n" "osd.$id" "$value" "$source"

    value_count[$value]=$(( ${value_count[$value]:-0} + 1 ))
done

echo
echo -e "${CYAN}Summary (distinct values):${NC}"
for v in "${!value_count[@]}"; do
    printf "  %-20s %d OSD(s)\n" "$v" "${value_count[$v]}"
done

if (( ${#unreachable[@]} > 0 )); then
    echo
    echo -e "${YELLOW}Unreachable OSDs (${#unreachable[@]}):${NC} ${unreachable[*]}"
fi
