Go Back
Using SQL to create complex JSON structures
Publisher
: Lingk
Run In Lingk
Description
This recipe demonstrates creating a number types of JSON structures. To run this recipe, choose your Environment, and click "Run". For more examples, go to: https://docs.databricks.com/spark/latest/spark-sql/complex-types.html
Browse the knowledge base
Twitter
E-Mail
# _____ _ _____ __ # | __ \ (_) |_ _| / _| # | |__) |___ ___ _ _ __ ___ | | _ __ | |_ ___ # | _ // _ \/ __| | '_ \ / _ \ | | | '_ \| _/ _ \ # | | \ \ __/ (__| | |_) | __/ _| |_| | | | || (_) | # |_| \_\___|\___|_| .__/ \___| |_____|_| |_|_| \___/ # | | # |_| # Project Name - USING SQL TO CREATE COMPLEX JSON STRUCTURES # Recipe URL - https://app.lingk.io/a/10932/tf/17912 # Description - # This recipe demonstrates creating a number types of JSON structures. # To run this recipe, choose your Environment, and click "Run". # For more examples, go to: https://docs.databricks.com/spark/latest/spark-sql/complex-types.html # Industry - Higher Ed # Business Process - Graduate Reporting # Systems - # Connectors - JSON # Data Flows - Single Direction # Connection Type - JSON # Add Recipe notes / Change log information here! # _____ _ # / ____| | | # | | ___ _ __ _ __ ___ ___| |_ ___ _ __ ___ # | | / _ \| '_ \| '_ \ / _ \/ __| __/ _ \| '__/ __| # | |___| (_) | | | | | | | __/ (__| || (_) | | \__ \ # \_____\___/|_| |_|_| |_|\___|\___|\__\___/|_| |___/ # # CONNECTORS specify what data will be pulled into the in-memory database during processing connectors: # JSON Setup - https://help.lingk.io/en/articles/74-json-connector-reference ###### Start: JSON Connectors ####### - name: sourceUsers type: json properties: jsonObject: | [ {"StudentId":"131e1w131e1w","FirstName":"John","LastName":"Doe", "Email":"john@doe.com"}, {"StudentId":"23e2#123e2#1","FirstName":"Tammy","LastName":"Doe", "Email":"tammy@doe.com"} ] ###### Start: JSON Connectors ####### # _____ _ _ _ # / ____| | | | | | # | (___ | |_ __ _| |_ ___ _ __ ___ ___ _ __ | |_ ___ # \___ \| __/ _` | __/ _ \ '_ ` _ \ / _ \ '_ \| __/ __| # ____) | || (_| | || __/ | | | | | __/ | | | |_\__ \ # |_____/ \__\__,_|\__\___|_| |_| |_|\___|_| |_|\__|___/ # STATEMENTS specify how the data should be processed while in memory statements: #******************************************************************** D I S C L A I M E R *********************************************************************************************** # * # Note that in an effort to keep recipes optimized for DPH (Data Processing Hours), print statements should be commented out after development has concluded for a recipe. * # For more information on DPH optimization, please visit the following help article - https://help.lingk.io/en/articles/212-minimizing-data-processing-hours-on-the-lingk-platform * # * #******************************************************************** D I S C L A I M E R *********************************************************************************************** # Create a simple JSON structure # Creates a struct with the given field values. - statement: | (structure) => SELECT "test" example1, struct("world" hello) as example2, named_struct("hello", "world") as example3 #- statement: print structure # Create a static nested JSON structure # Creates a struct with the given field values. - statement: | (nestedStructure) => SELECT false enable_sis_reactivation, false force_validations, named_struct( "name", "John Doe", "skip_registration",true, "terms_of_use",true ) user, named_struct( "unique_id", "john@doe.com", "sis_user_id", "1234", "send_confirmation", true, "force_self_registration", false ) pseudonym, named_struct( "type","email", "address", "john@doe.com", "skip_confirmation", false ) communication_channel #- statement: print nestedStructure # Create a dynamic nested JSON structure # Creates a struct with the given field values. - statement: | (dynamicNestedStructure) => SELECT false enable_sis_reactivation, false force_validations, named_struct( "name", concat(FirstName, " ", LastName), "skip_registration",true, "terms_of_use",true ) user, named_struct( "unique_id", Email, "sis_user_id", StudentId, "send_confirmation", true, "force_self_registration", false ) pseudonym, named_struct( "type","email", "address", Email, "skip_confirmation", false ) communication_channel FROM sourceUsers #- statement: print dynamicNestedStructure # Create dynamic nested JSON array # Collects and returns a list of non-unique elements. # Creates a struct with the given field values. - statement: | (dynamicNestedArray) => SELECT current_timestamp() created_date, collect_list(struct( false enable_sis_reactivation, false force_validations, named_struct( "name", concat(FirstName, " ", LastName), "skip_registration",true, "terms_of_use",true ) user, named_struct( "unique_id", Email, "sis_user_id", StudentId, "send_confirmation", true, "force_self_registration", false ) pseudonym, named_struct( "type","email", "address", Email, "skip_confirmation", false ) communication_channel ) ) items FROM sourceUsers #- statement: print dynamicNestedArray # Create dynamic JSON array of strings # [ "John", "Tammy" ] # Collects and returns a list of non-unique elements. - statement: | (dynamicArrayGroup) => SELECT current_timestamp() created_date, LastName last_name , collect_list( FirstName ) first_names FROM sourceUsers GROUP BY StudentId #- statement: print dynamicArrayGroup # Create dynamic JSON array of structs # [ { "firstName": "John" }, { "firstName": "Tammy" } ] # Collects and returns a list of non-unique elements. - statement: | (dynamicArrayGroup) => SELECT first(FirstName), collect_set( named_struct( "firstName", FirstName ) ) first_names FROM sourceUsers GROUP BY StudentId #- statement: print dynamicArrayGroup # Create dynamic JSON array of strings # No group by needed if there are not additional fields # [ { "firstName": "John" }, { "firstName": "Tammy" } ] # Collects and returns a list of non-unique elements. - statement: | (dynamicArrayGroup) => SELECT collect_set( named_struct( "firstName", FirstName ) ) first_names FROM sourceUsers #- statement: print dynamicArrayGroup # Add more statements to convert, join, aggregrate, transform, and integrate your data
Fuzzy Matching with Soundex and Levenshtein
Using Jinja-like Template Expressions in Connectors and Statements