dataset: datasource/database/schema/customers checks: - schema: allow_extra_columns: false allow_other_column_order: false - row_count: threshold: must_be_greater_than: 0 - failed_rows: name: "Email should be unique (if present)" qualifier: email_unique query: | SELECT email FROM customers WHERE email IS NOT NULL AND email <> '' GROUP BY email HAVING COUNT(*) > 1 threshold: must_be: 0 columns: - name: customer_id data_type: string checks: - missing: - duplicate: - invalid: name: "customer_id length guardrail" valid_min_length: 1 valid_max_length: 64 - name: email data_type: string checks: - invalid: name: "email format (basic)" valid_format: name: Email pattern regex: "^[^@\\s]+@[^@\\s]+\\.[^@\\s]+$" threshold: metric: percent must_be_less_than: 0.5 - name: country_code data_type: string checks: - missing: - invalid: name: "Two-letter country code" valid_format: name: ISO-3166 alpha-2 regex: "^[A-Z]{2}$" - name: status data_type: string checks: - missing: - invalid: name: "Allowed customer statuses" valid_values: - active - inactive - banned