#!/bin/bash

# ceph_backfill_check.sh - Check max_backfills settings for Ceph OSDs
# Usage: ./ceph_backfill_check.sh [OPTIONS]
#
# Examples:
#   ./ceph_backfill_check.sh --hosts csn12           # Show OSDs on specific host
#   ./ceph_backfill_check.sh --hosts csn07,csn08     # Show OSDs on multiple hosts
#   ./ceph_backfill_check.sh --root rbd              # Show OSDs under specific root
#   ./ceph_backfill_check.sh --rack rack1            # Show OSDs in specific rack
#   ./ceph_backfill_check.sh --tree datacenter1      # Show OSDs in specific tree location

set -euo pipefail

# Color definitions
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
BLUE='\033[0;34m'
CYAN='\033[0;36m'
NC='\033[0m' # No Color

# Function to print colored output
print_color() {
    local color=$1
    shift
    echo -e "${color}$*${NC}"
}

# Function to print usage
usage() {
    echo "Usage: $0 [OPTIONS]"
    echo ""
    echo "Options:"
    echo "  --hosts HOST[,HOST2,...]  Show OSDs on specific host(s) (comma-separated)"
    echo "  --root ROOT               Show OSDs under specific root"
    echo "  --rack RACK               Show OSDs in specific rack"
    echo "  --tree TREE               Show OSDs in specific tree location"
    echo "  -h, --help                Show this help message"
    echo ""
    echo "Examples:"
    echo "  $0 --hosts csn12          # Show OSDs on host csn12"
    echo "  $0 --hosts csn07,csn08    # Show OSDs on hosts csn07 and csn08"
    echo "  $0 --root rbd             # Show OSDs under root rbd"
    echo "  $0 --rack rack1           # Show OSDs in rack rack1"
    echo "  $0 --tree dc1             # Show OSDs in tree location dc1"
    echo ""
    exit 1
}

# Function to check if ceph command is available
check_ceph_cmd() {
    if ! command -v ceph &> /dev/null; then
        print_color $RED "Error: 'ceph' command not found. Please ensure Ceph is installed and configured."
        exit 1
    fi
}

# Function to get OSD tree information
get_osd_tree() {
    ceph osd tree --format=json 2>/dev/null
}

# Function to get all OSDs or filter by criteria
get_osds() {
    local filter_type="$1"
    local filter_name="$2"
    local osd_tree_json="$3"
    
    if [[ "$filter_type" == "none" ]]; then
        # Get all OSDs
        echo "$osd_tree_json" | jq -r '.nodes[] | select(.type == "osd") | .id'
    elif [[ "$filter_type" == "hosts" ]]; then
        # Get OSDs for specific host(s) - supports comma-separated list
        local IFS=','
        local hosts=($filter_name)
        local all_osds=""
        
        for host in "${hosts[@]}"; do
            # Trim whitespace
            host=$(echo "$host" | xargs)
            
            local host_id=$(echo "$osd_tree_json" | jq -r ".nodes[] | select(.type == \"host\" and .name == \"$host\") | .id")
            if [[ -z "$host_id" || "$host_id" == "null" ]]; then
                print_color $RED "Error: Host '$host' not found"
                exit 1
            fi
            
            local host_osds=$(echo "$osd_tree_json" | jq -r ".nodes[] | select(.type == \"host\" and .name == \"$host\") | .children[]?")
            if [[ -n "$host_osds" ]]; then
                if [[ -n "$all_osds" ]]; then
                    all_osds="$all_osds"$'\n'"$host_osds"
                else
                    all_osds="$host_osds"
                fi
            fi
        done
        
        echo "$all_osds"
    elif [[ "$filter_type" == "root" || "$filter_type" == "rack" || "$filter_type" == "tree" ]]; then
        # Get OSDs for specific tree location (root, rack, or any tree node)
        local tree_id
        if [[ "$filter_type" == "root" ]]; then
            tree_id=$(echo "$osd_tree_json" | jq -r ".nodes[] | select(.type == \"root\" and .name == \"$filter_name\") | .id")
        elif [[ "$filter_type" == "rack" ]]; then
            tree_id=$(echo "$osd_tree_json" | jq -r ".nodes[] | select(.type == \"rack\" and .name == \"$filter_name\") | .id")
        else
            # Generic tree search - find any node with matching name
            tree_id=$(echo "$osd_tree_json" | jq -r ".nodes[] | select(.name == \"$filter_name\") | .id")
        fi
        
        if [[ -z "$tree_id" || "$tree_id" == "null" ]]; then
            print_color $RED "Error: Tree location '$filter_name' not found"
            exit 1
        fi
        
        # Recursively find all OSDs under this tree node
        find_osds_under_node() {
            local node_id=$1
            local json=$2
            
            # Get direct children
            echo "$json" | jq -r ".nodes[] | select(.id == $node_id) | .children[]?" | while read -r child_id; do
                if [[ -n "$child_id" && "$child_id" != "null" ]]; then
                    local child_type=$(echo "$json" | jq -r ".nodes[] | select(.id == $child_id) | .type")
                    if [[ "$child_type" == "osd" ]]; then
                        echo "$child_id"
                    else
                        # Recursively check children
                        find_osds_under_node "$child_id" "$json"
                    fi
                fi
            done
        }
        
        find_osds_under_node "$tree_id" "$osd_tree_json"
    fi
}

# Function to get OSD host information
get_osd_host() {
    local osd_id=$1
    local osd_tree_json=$2
    
    # Find the host that contains this OSD
    local host_name=$(echo "$osd_tree_json" | jq -r ".nodes[] | select(.type == \"host\" and (.children[]? // empty) == $osd_id) | .name" 2>/dev/null || echo "")
    
    # Return the host name or "unknown" if not found
    echo "${host_name:-unknown}"
}

# Function to get max_backfills setting for an OSD
get_max_backfills() {
    local osd_id=$1
    
    # Temporarily disable exit on error for this function
    set +e
    
    # Try to get the runtime value using ceph config show
    local result=$(ceph config show osd.$osd_id | grep osd_max_backfills | awk '{print $2}' 2>/dev/null)
    local exit_code=$?
    
    # If command failed or result is empty, use default
    if [[ $exit_code -ne 0 || -z "$result" ]]; then
        result="1"
    fi
    
    # Re-enable exit on error
    set -e
    
    echo "$result"
}

# Function to format and display results
display_results() {
    local filter_type="$1"
    local filter_name="$2"
    
    print_color $CYAN "=== Ceph OSD Max Backfills Check ==="
    echo
    
    if [[ "$filter_type" == "hosts" ]]; then
        if [[ "$filter_name" == *","* ]]; then
            print_color $BLUE "Showing OSDs on hosts: $filter_name"
        else
            print_color $BLUE "Showing OSDs on host: $filter_name"
        fi
    elif [[ "$filter_type" == "root" ]]; then
        print_color $BLUE "Showing OSDs under root: $filter_name"
    elif [[ "$filter_type" == "rack" ]]; then
        print_color $BLUE "Showing OSDs in rack: $filter_name"
    elif [[ "$filter_type" == "tree" ]]; then
        print_color $BLUE "Showing OSDs in tree location: $filter_name"
    else
        print_color $BLUE "Showing all OSDs"
    fi
    echo
    
    # Get OSD tree information
    local osd_tree_json
    if ! osd_tree_json=$(get_osd_tree); then
        print_color $RED "Error: Failed to get OSD tree information"
        exit 1
    fi
    
    # Get list of OSDs to check
    local osds
    if ! osds=$(get_osds "$filter_type" "$filter_name" "$osd_tree_json"); then
        print_color $RED "Error: Failed to get OSD list"
        exit 1
    fi
    
    
    if [[ -z "$osds" ]]; then
        print_color $YELLOW "No OSDs found matching the criteria"
        exit 0
    fi
    
    # Header
    printf "%-8s %-20s %-15s\n" "OSD" "Host" "Max Backfills"
    printf "%-8s %-20s %-15s\n" "---" "----" "-------------"
    
    # Track statistics
    local total_count=0
    local backfill_counts=()
    declare -A backfill_summary
    
    # Process each OSD
    # Convert OSDs to array for proper iteration
    local osd_array=()
    while IFS= read -r osd_id; do
        if [[ -n "$osd_id" ]]; then
            osd_array+=("$osd_id")
        fi
    done <<< "$osds"
    
    # Process each OSD in the array
    for osd_id in "${osd_array[@]}"; do
        if [[ -n "$osd_id" && "$osd_id" != "null" ]]; then
            local host=$(get_osd_host "$osd_id" "$osd_tree_json")
            local max_backfills=$(get_max_backfills "$osd_id")
            
            # Color code based on backfills value
            local color=$NC
            if [[ "$max_backfills" =~ ^[0-9]+$ ]]; then
                if (( max_backfills >= 3 )); then
                    color=$GREEN
                elif (( max_backfills == 2 )); then
                    color=$YELLOW
                else
                    color=$RED
                fi
            fi
            
            printf "${color}%-8s %-20s %-15s${NC}\n" "osd.$osd_id" "${host:-unknown}" "$max_backfills"
            
            # Update statistics (use : to ensure these always return 0)
            : $((total_count++))
            backfill_counts+=("$max_backfills")
            # Initialize array element if it doesn't exist to avoid unbound variable error
            if [[ -z "${backfill_summary["$max_backfills"]+x}" ]]; then
                backfill_summary["$max_backfills"]=0
            fi
            : $((backfill_summary["$max_backfills"]++))
        fi
    done
    
    echo
    print_color $CYAN "=== Summary ==="
    echo "Total OSDs checked: $total_count"
    echo
    
    # Show distribution
    print_color $BLUE "Max Backfills Distribution:"
    for backfill_value in $(printf '%s\n' "${!backfill_summary[@]}" | sort -n); do
        local count=${backfill_summary[$backfill_value]}
        local percentage=$(( count * 100 / total_count ))
        printf "  %s: %d OSDs (%d%%)\n" "$backfill_value" "$count" "$percentage"
    done
    
    echo
    print_color $BLUE "Legend:"
    print_color $GREEN "  Green: >= 3 backfills (Good parallelism)"
    print_color $YELLOW "  Yellow: 2 backfills (Moderate)"
    print_color $RED "  Red: <= 1 backfills (Conservative)"
}

# Main execution
main() {
    local filter_type="none"
    local filter_name=""
    
    # Show help if no arguments provided
    if [[ $# -eq 0 ]]; then
        usage
    fi
    
    # Parse arguments
    while [[ $# -gt 0 ]]; do
        case $1 in
            --hosts)
                filter_type="hosts"
                filter_name="$2"
                shift 2
                ;;
            --root)
                filter_type="root"
                filter_name="$2"
                shift 2
                ;;
            --rack)
                filter_type="rack"
                filter_name="$2"
                shift 2
                ;;
            --tree)
                filter_type="tree"
                filter_name="$2"
                shift 2
                ;;
            -h|--help)
                usage
                ;;
            *)
                print_color $RED "Error: Unknown option '$1'"
                usage
                ;;
        esac
    done
    
    # Validate that if a filter type is specified, a name is provided
    if [[ "$filter_type" != "none" && -z "$filter_name" ]]; then
        print_color $RED "Error: Filter type specified but no name provided"
        usage
    fi
    
    # Check prerequisites
    check_ceph_cmd
    
    # Display results
    display_results "$filter_type" "$filter_name"
}

# Run main function with all arguments
main "$@"