Overview #
In this tutorial I’m using the following Kubernetes cluster:
-
Deployed with Kubeadm
-
Based on Ubuntu 24.04
# Kubernetes cluster
NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME
ubuntu1 Ready control-plane 47d v1.28.11 192.168.30.10 <none> Ubuntu 24.04 LTS 6.8.0-36-generic containerd://1.7.18
ubuntu2 Ready worker 47d v1.28.11 192.168.30.11 <none> Ubuntu 24.04 LTS 6.8.0-36-generic containerd://1.7.18
ubuntu3 Ready worker 47d v1.28.11 192.168.30.12 <none> Ubuntu 24.04 LTS 6.8.0-36-generic containerd://1.7.18
My Ceph cluster has three nodes and is deployed via Cephadm
on Rocky Linux 9.4
:
# List cluster nodes
ceph orch host ls
# Shell output:
HOST ADDR LABELS STATUS
rocky1 192.168.30.100 _admin
rocky2 192.168.30.101
rocky3 192.168.30.102
3 hosts in cluster
Ceph Cluster #
List Ceph Cluster ID #
Copy the Ceph cluster ID fsid
for the Kubernetes ConfigMap:
# List Ceph cluster ID
sudo ceph fsid
# Shell output:
3b1f0e44-3631-11ef-8ac6-000c29ad85a9
List Ceph Monitor Addresses #
Copy the Ceph monitor addresses for the Kubernetes ConfigMap:
# List Ceph monitor addresses
ceph mon dump
# Shell output:
epoch 3
fsid 3b1f0e44-3631-11ef-8ac6-000c29ad85a9 # Copy for ConfigMap
last_changed 2024-06-29T16:10:44.013980+0000
created 2024-06-29T16:04:38.852842+0000
min_mon_release 18 (reef)
election_strategy: 1
0: [v2:192.168.30.100:3300/0,v1:192.168.30.100:6789/0] mon.rocky1 # Copy for ConfigMap
1: [v2:192.168.30.101:3300/0,v1:192.168.30.101:6789/0] mon.rocky2 # Copy for ConfigMap
2: [v2:192.168.30.102:3300/0,v1:192.168.30.102:6789/0] mon.rocky3 # Copy for ConfigMap
dumped monmap epoch 3
Storage Pool #
Create new Storage Pool #
Create a Ceph storage pool for the RBD images that will be used in the Kubernetes cluster:
# Create storage pool: With 64 placement groups
ceph osd pool create kubernetes-pool 64 64 replicated
# Shell output:
pool 'kubernetes-pool' created
# Set the replication factor for the new storage pool
ceph osd pool set kubernetes-pool size 3
# Shell output:
set pool 2 size to 3
Verify the Storage Pool #
# List storage pools
ceph osd lspools
# Shell output:
1 .mgr
2 kubernetes-pool
Initialize Storage Pool #
# Initialize the Kubernetes pool: This allowes the creation and management of RBD images
rbd pool init kubernetes-pool
Create Ceph User #
Create a Ceph user for the new “kubernetes-pool” Ceoh storage pool:
# Create user: Encode the generated user key as base64
ceph auth get-or-create-key client.kubeAdmin mds 'allow *' mgr 'allow *' mon 'allow *' osd 'allow * pool=kubernetes-pool' | tr -d '\n' | base64;
# Shell output:
QVFDU044ZG00NUU2RHhBQWx6VkhMYjRtS3JldC9ZVStXM2xiNXc9PQ==
# Encode "kubeAdmin" username as base64
echo "kubeAdmin" | tr -d '\n' | base64;
# Shell output:
a3ViZUFkbWlu
Verify the Ceph user:
# List Ceph users
ceph auth ls
Kubernetes Cluster #
Install Ceph-Common #
Install the Ceph-COmmon package on the Kubernetes nodes:
# Install the Ceph common package
sudo apt install ceph-common -y
# Verify the installation / list version
ceph --version
# Shell output:
ceph version 19.2.0~git20240301.4c76c50 (4c76c50a73f63ba48ccdf0adccce03b00d1d80c7) squid (dev)
Load RBD Kernel Module #
Make sure the RBD kernel module is loaded on the Kubernetes nodes:
# Permanent load RBD kernel module
sudo vi /etc/modules
# Add the following line
rbd
# Manually / temporary load RBD kernel module
sudo modprobe rbd
# Verify the RBD kenerel module is loaded
lsmod | grep rbd
# Shell output:
rbd 126976 0
libceph 544768 1 rbd
Install CSI Ceph Driver #
Install Helm #
# Install Helm via script
curl -fsSL -o get_helm.sh https://raw.githubusercontent.com/helm/helm/main/scripts/get-helm-3 &&
chmod +x get_helm.sh &&
./get_helm.sh
# Verify the installation / check version
helm version
Create Namespace #
# Create a namespace for the Ceph CSI driver
kubectl create namespace ceph-csi-rbd
Clone Ceph CSI Repository #
Note: So far I could only get the Ceph CSI driver for RBD images to run with version “3.3.1”.
# Clone the Ceph CSI repository
git clone https://github.com/ceph/ceph-csi.git
# Open directory
cd ceph-csi
# Check out version
git checkout v3.3.1
# Move to the RBD chart
cd charts/ceph-csi-rbd
Adopt Chart Values #
Create a values file for the Helm chart, that defines the Ceph monitoring nodes and cluster IP:
cat <<EOF > ceph-csi-rbd-values.yaml
csiConfig:
- clusterID: "3b1f0e44-3631-11ef-8ac6-000c29ad85a9" # Define Ceph cluster ID
monitors:
- "192.168.30.100:6789" # Define Ceph monitor IPs
- "192.168.30.101:6789" # Define Ceph monitor IPs
- "192.168.30.102:6789" # Define Ceph monitor IPs
provisioner:
name: provisioner
replicaCount: 2
EOF
Install Ceph CSI RBD with Helm #
# Install the Ceph CSI Driver for Ceph RBD images
helm install \
--namespace ceph-csi-rbd \
ceph-csi-rbd \
--values ceph-csi-rbd-values.yaml \
./
# Shell output:
NAME: ceph-csi-rbd
LAST DEPLOYED: Thu Aug 22 14:36:09 2024
NAMESPACE: ceph-csi-rbd
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
Examples on how to configure a storage class and start using the driver are here:
https://github.com/ceph/ceph-csi/tree/devel/examples/rbd
Verify Ceph CSI RBD Installation #
Watch Rollout #
Watch the Helm rollout ant wait till the status changes to: successfully rolled out
# Watch the rollout
kubectl rollout status deployment ceph-csi-rbd-provisioner -n ceph-csi-rbd
# Shell output:
Waiting for deployment "ceph-csi-rbd-provisioner" rollout to finish: 0 of 2 updated replicas are available...
Waiting for deployment "ceph-csi-rbd-provisioner" rollout to finish: 1 of 2 updated replicas are available...
deployment "ceph-csi-rbd-provisioner" successfully rolled out # Check rollout status
Optional, verify the Helm status again:
# Verify status
helm status ceph-csi-rbd -n ceph-csi-rbd
# Shell output:
NAME: ceph-csi-rbd
LAST DEPLOYED: Thu Aug 22 14:36:09 2024
NAMESPACE: ceph-csi-rbd
STATUS: deployed
REVISION: 1
TEST SUITE: None
NOTES:
Examples on how to configure a storage class and start using the driver are here:
https://github.com/ceph/ceph-csi/tree/devel/examples/rbd
List Ceph CSI RBD Pods #
# Verify pods
kubectl get pods -n ceph-csi-rbd
# Shell output:
NAME READY STATUS RESTARTS AGE
ceph-csi-rbd-nodeplugin-9hxxj 3/3 Running 0 3m15s
ceph-csi-rbd-nodeplugin-vv8gg 3/3 Running 0 3m15s
ceph-csi-rbd-provisioner-b5c69f668-8jc56 7/7 Running 0 3m15s
ceph-csi-rbd-provisioner-b5c69f668-pfbzr 7/7 Running 0 3m15s
Create Kubernetes Secret #
Create a Kubernetes secret with the base64 encoded user and key of the Ceph “kubernetes-pool” pool:
vi ceph-admin-secret.yaml
apiVersion: v1
kind: Secret
metadata:
name: ceph-admin
namespace: default
type: kubernetes.io/rbd
data:
userID: a3ViZUFkbWlu # Define Ceph username
userKey: QVFDU044ZG00NUU2RHhBQWx6VkhMYjRtS3JldC9ZVStXM2xiNXc9PQ== # Define Ceph user key
# Create the secret
kubectl apply -f ceph-admin-secret.yaml
Verify the new secret:
# List secrets
kubectl get secrets
# Shell output:
NAME TYPE DATA AGE
ceph-admin kubernetes.io/rbd 2 4s
# List secret details:
kubectl get secret ceph-admin -o yaml
Create Storage Class #
Create a new storage class that uses the Ceph CSI:
vi ceph-rbd-sc.yaml
apiVersion: storage.k8s.io/v1
kind: StorageClass
metadata:
name: ceph-rbd-sc
annotations:
storageclass.kubernetes.io/is-default-class: "true"
provisioner: rbd.csi.ceph.com
parameters:
clusterID: 3b1f0e44-3631-11ef-8ac6-000c29ad85a9 # Define Ceph cluster ID
pool: kubernetes-pool # Define Ceph storage pool
imageFeatures: layering
csi.storage.k8s.io/provisioner-secret-name: ceph-admin
csi.storage.k8s.io/provisioner-secret-namespace: default
csi.storage.k8s.io/controller-expand-secret-name: ceph-admin
csi.storage.k8s.io/controller-expand-secret-namespace: default
csi.storage.k8s.io/node-stage-secret-name: ceph-admin
csi.storage.k8s.io/node-stage-secret-namespace: default
reclaimPolicy: Delete
allowVolumeExpansion: true
mountOptions:
- discard
kubectl apply -f ceph-rbd-sc.yaml
Verify Storage Class #
# List StorageClasses
kubectl get sc
# Shell output:
NAME PROVISIONER RECLAIMPOLICY VOLUMEBINDINGMODE ALLOWVOLUMEEXPANSION AGE
ceph-rbd-sc (default) rbd.csi.ceph.com Delete Immediate true 5s
Verify CSI Driver #
# List CSI drivers
kubectl get csidrivers
# Shell output:
NAME ATTACHREQUIRED PODINFOONMOUNT STORAGECAPACITY TOKENREQUESTS REQUIRESREPUBLISH MODES AGE
rbd.csi.ceph.com true false false <unset> false Persistent 5m19s
Test Ceph CSI #
Create Persistent Volume Claim #
vi example-pv.yaml
kind: PersistentVolumeClaim
apiVersion: v1
metadata:
name: ceph-rbd-sc-pvc # Define PVC name
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 2Gi
storageClassName: ceph-rbd-sc # Define the Ceph CSI storage class
# Create the persistent volume claim
kubectl apply -f example-pv.yaml
Verify Persistent Volume Claim #
Verify PVC #
# List PVC
kubectl get pvc
# Shell output:
NAME STATUS VOLUME CAPACITY ACCESS MODES STORAGECLASS AGE
ceph-rbd-sc-pvc Bound pvc-c284d51a-504a-4feb-aeed-1cabf8e8a25e 2Gi RWO ceph-rbd-sc 74s
A persistent volume is dynamically provisioned when a new persistent volume claim is made:
# Verify the PV
kubectl get pv
# Shell output:
NAME CAPACITY ACCESS MODES RECLAIM POLICY STATUS CLAIM STORAGECLASS REASON AGE
pvc-0dcaa589-ba22-4e19-8581-6fe1da159da0 2Gi RWO Delete Bound default/ceph-rbd-sc-pvc ceph-rbd-sc 24s
Verify RBD Volume #
Verify the dynamically provisioned RBD volume in the Ceph storage pool:
# List volumes in "kubernetes-pool" storage pool
rbd ls -p kubernetes-pool
# Shell output:
csi-vol-c14228b3-6094-11ef-b6ad-9a616b08e871
Create Example Pod #
Create an example pod that mounts the PVC as a volume:
vi example-pod.yaml
apiVersion: v1
kind: Pod
metadata:
name: ceph-pod
spec:
containers:
- name: ceph-pod
image: busybox
command: ["sleep", "infinity"]
volumeMounts:
- mountPath: /mnt/ceph_rbd
name: volume
volumes:
- name: volume
persistentVolumeClaim:
claimName: ceph-rbd-sc-pvc # Define PVC name
# Deploy the pod
kubectl apply -f example-pod.yaml
Verify Pod #
List Pod #
# List pods
kubectl get pods
# Shell output:
NAME READY STATUS RESTARTS AGE
ceph-pod 1/1 Running 0 12s
Verify Volume Mount #
kubectl exec pod/ceph-pod -- df -k | grep rbd
# Shell output:
/dev/rbd0 1992552 24 1976144 0% /mnt/ceph_rbd
Delete Resources #
# Delete the example pod
kubectl delete -f example-pod.yaml
# Delete the PVC, this automatically deletes the PV and RBD volume
kubectl delete pvc ceph-rbd-sc-pvc
# List PVs
kubectl get pv
# Shell output:
No resources found
Troubleshooting #
# List CSI Provisioner Logs
kubectl logs -n ceph-csi-rbd <csi-provisioner-pod> -c csi-provisioner