dataset: datasource/database/schema/products checks: - schema: allow_extra_columns: false allow_other_column_order: false - row_count: threshold: must_be_greater_than: 0 # Dataset integrity rules - failed_rows: name: "List price must not be negative" qualifier: list_price_non_negative expression: list_price < 0 - failed_rows: name: "SKU should be unique" qualifier: sku_unique query: | SELECT sku FROM datasource.database.schema.products WHERE sku IS NOT NULL GROUP BY sku HAVING COUNT(*) > 1 threshold: must_be: 0 description: "Prevents duplicate SKUs that break catalog joins and reporting." columns: - name: product_id data_type: string checks: - missing: - duplicate: - invalid: name: "product_id length guardrail" valid_min_length: 1 valid_max_length: 64 - name: sku data_type: string checks: - missing: - invalid: name: "sku must be uppercase letters/numbers with separators" valid_format: name: SKU format regex: "^[A-Z0-9][A-Z0-9_-]{0,63}$" - name: product_name data_type: string checks: - missing: - invalid: name: "product_name length guardrail" valid_min_length: 2 valid_max_length: 255 - name: category data_type: string checks: - missing: - invalid: name: "category length guardrail" valid_min_length: 2 valid_max_length: 100 - name: list_price data_type: float checks: - missing: - invalid: name: "list_price must be zero or positive" valid_min: 0 - name: status data_type: string checks: - missing: - invalid: name: "Allowed product statuses" valid_values: - active - discontinued - out_of_stock - draft