Table Design and Data Type Operations

Create a table that covers all table structure data types supported by Singdata Lakehouse, and use this table as the base table to create a regular view.

Create schema

CREATE SCHEMA IF NOT EXISTS lakehouse_demo_table_design_schema;
USE SCHEMA lakehouse_demo_table_design_schema;
SELECT current_schema();

Create Tables and Views

-- Singdata Lakehouse supported data types

CREATE TABLE IF NOT EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes
(
    c_bigint BIGINT,
    c_boolean BOOLEAN,
    c_binary BINARY,
    c_char CHAR,
    c_data DATE,
    c_decimal DECIMAL(20,6),
    c_double DOUBLE,
    c_float FLOAT,
    c_int INT,
    c_interval INTERVAL DAY,
    c_smallint SMALLINT,
    c_string STRING,
    c_timestamp TIMESTAMP,
    c_tinyint TINYINT,
    c_array ARRAY<STRUCT<a: INT, b: STRING>>,
    c_map MAP<STRING, STRING>,
    c_struct STRUCT<a: INT, b: STRING, c: DOUBLE>,
    c_varchar VARCHAR(1024),
    c_json JSON
);
-- The LIKE statement can create another table, making the target table have the same structure as the source table. However, the table created by this statement does not copy data.
CREATE  TABLE IF NOT EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes_like LIKE lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- The AS statement can be used to synchronously or asynchronously query the original table and create a new table based on the query results, then insert the query results into the new table, but it will not copy partition information.
CREATE  TABLE IF NOT EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes_as AS select* from lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- Create a regular view
CREATE VIEW  IF NOT EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes_view as select* from lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- Check the created tables and views
show tables like 'clickzetta_datatypes%' in lakehouse_demo_table_design_schema;

Insert Records into the Table

INSERT INTO lakehouse_demo_table_design_schema.clickzetta_datatypes VALUES
(1, true, X'01', 'a', DATE'2022-02-01', 1000.123456, 2.0, 1.5, 42, INTERVAL 1 DAY, 103, 'test string 1',TIMESTAMP '2022-02-01 20:00:00', 11, ARRAY(STRUCT(1, 'A')), MAP('key1', 'value1'), STRUCT(1, 'A', 2.0), 'varchar example 1',JSON '{"id": 1, "value": "100", "comment": "JSON Sample data"}' ),
(2, false, X'02', 'b', DATE'2022-02-02', 2000.234567, 4.0, 2.5, 84, INTERVAL 2 DAY, 104,'test string 2',TIMESTAMP '2022-02-02 21:00:00', 12, ARRAY(STRUCT(2, 'B')), MAP('key2', 'value2'), STRUCT(2, 'B', 4.0), 'varchar example 2',JSON '{"id": 2, "value": "200", "comment": "JSON Sample data"}' );

Query date, timestamp, interval type data in the table

-- 1. Filter by date
SELECT * FROM lakehouse_demo_table_design_schema.clickzetta_datatypes WHERE c_data >= DATE '2022-02-02';

-- 2. Select records within a specific time range
SELECT * FROM lakehouse_demo_table_design_schema.clickzetta_datatypes WHERE c_timestamp BETWEEN TIMESTAMP '2022-02-01 20:00:00' AND TIMESTAMP '2022-02-02 21:00:00';

-- 3. Add days to a date
SELECT c_data, c_data + INTERVAL 7 DAY as plus_7_days FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 4. Calculate the difference in days between two dates
SELECT c_data, DATEDIFF((SELECT c_data FROM lakehouse_demo_table_design_schema.clickzetta_datatypes WHERE c_bigint = 2), c_data) as days_difference FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 5. Extract the year, month, and day from a date
SELECT EXTRACT(YEAR FROM c_data) as year, EXTRACT(MONTH FROM c_data) as month, EXTRACT(DAY FROM c_data) as day FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 6. Calculate the difference between a timestamp and the current time (minutes)
SELECT c_timestamp, TIMESTAMPDIFF(MINUTE, c_timestamp, NOW()) as minutes_difference FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 7. Calculate the difference between a timestamp and the current time (minutes)
SELECT  INTERVAL 10 DAY, c_interval, INTERVAL 10 DAY > c_interval from lakehouse_demo_table_design_schema.clickzetta_datatypes;

Operating Complex Data Types: map, array, struct, json

-- 1. Extract values from map
SELECT
  c_int,
  c_map['key1'] AS map_key1_value, 
  c_map['key2'] AS map_key2_value
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 2. Calculate map length
SELECT
  c_int,
  cardinality(c_map) AS map_length
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 3. Extract struct fields from array
SELECT
  c_int,
  c_array[0].a AS array_col1_value, 
  c_array[0].b AS array_col2_value
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 4. Calculate array length
SELECT
  c_int,
  array_size(c_array) AS array_length
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 5. Extract values from struct
SELECT
  c_int,
  c_struct.a AS struct_col1_value,
  c_struct.b AS struct_col2_value,
  c_struct.c AS struct_col3_value
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 6. Combine results into one cell
SELECT
  c_int,
  concat_ws(
    'Map Value Key1: ', c_map['key1'], ', ',
    'Map Value Key2: ', c_map['key2'], ', ',
    'Array Struct: (', c_array[0].a, ', ', c_array[0].b, ')'
  ) AS combined_result
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 7. Extract JSON fields
SELECT
  json_extract_int(c_json,"$.id") as id,
  json_extract_bigint(c_json,"$.value") as value,
  json_extract_string(c_json,"$.comment") as comment
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

Query MAP Type Data

Below are some examples of SELECT statements for the c_map type column in the clickzetta_datatypes table:

-- 1. Extract values from the map
SELECT
  c_int,
  c_map['key1'] AS map_key1_value, 
  c_map['key2'] AS map_key2_value
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 2. Calculate the length of the map
SELECT
  c_int,
  cardinality(c_map) AS map_length
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 3. Check if a specified key exists in the map
SELECT
  c_int,
  c_map['key_to_check'] IS NOT NULL AS key_exists
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 4. Use values in the map for conditional filtering
SELECT
  c_int,
  c_map
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes
WHERE c_map['key1'] = 'value_to_match';

-- 5. Return the row with the maximum key value
SELECT
  c_int,
  c_map
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes
WHERE c_map['key_with_max_value'] = (
  SELECT MAX(c_map['key_with_max_value'])
  FROM lakehouse_demo_table_design_schema.clickzetta_datatypes
);

-- 6. Convert the map to an array and extract the key and value of the first element
SELECT
  c_int,
  map_keys(c_map)[0] AS first_key,
  map_values(c_map)[0] AS first_value
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

Query Array Type Data

Below are some examples of SELECT statements for the c_array type column in the clickzetta_datatypes table:

-- 1. Extract values from array
SELECT
  c_int,
  c_array[0] AS array_element_1, 
  c_array[1] AS array_element_2
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 2. Calculate array length
SELECT
  c_int,
  cardinality(c_array) AS array_length
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

Query Struct Type Data

Below are some examples of SELECT statements for the c_struct type column in the clickzetta_datatypes table, including performing various operations on the c_struct column.

-- 1. Select all attributes of c_struct
SELECT c_struct.a, c_struct.b, c_struct.c
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 2. Perform operations on the c_struct column

-- 2.1 Select the result of multiplying the a attribute in the c_struct column by a certain value (e.g., 2)
SELECT c_struct.a * 2 as multiplied_a
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 2.2 Calculate the average value using the a and c attributes in the c_struct column
SELECT (c_struct.a + c_struct.c) / 2 as avg_a_c
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 2.3 Sort the results based on the b attribute in the c_struct column
SELECT c_struct.a, c_struct.b, c_struct.c
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes
ORDER BY c_struct.b;

-- 3. Use aggregate functions to process the c_struct column

-- 3.1 Calculate the sum of the a attribute in the c_struct column
SELECT SUM(c_struct.a) as total_a
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 3.2 Calculate the average value of the c attribute in the c_struct column
SELECT AVG(c_struct.c) as avg_c
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

-- 3.3 Get the maximum value of the a attribute in the c_struct column
SELECT MAX(c_struct.a) as max_a
FROM lakehouse_demo_table_design_schema.clickzetta_datatypes;

Cleanup

DROP TABLE IF EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes;
DROP TABLE IF EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes_like;
DROP TABLE IF EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes_as;
DROP VIEW IF EXISTS lakehouse_demo_table_design_schema.clickzetta_datatypes_view;
DROP SCHEMA IF EXISTS lakehouse_demo_table_design_schema;

Congratulations, it's done.

Please enjoy and learn more!

Appendix

Download Zeppelin Notebook source file

The code in this document is also available in a version that runs on Zeppelin. If you want to run the code directly, please follow the documentation to install Zeppelin.

02.Table Design.ipynb