Functions to format a SQL file with multiple queries and SQL statements

Use-Case

Assume you have a file called sql_file.sql containing SQL statements and queries.

After reading it in python we could have something like this:

sql_file = """
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;

create or replace view first_view as -- my first view
select a.car_id,
       b.car_name, sum(a.price) over (partition by b.car_name order by a.car_id) as sum_price, a.price,
from sales as a left join (select car_id, car_name, from cars) as b 
on a.car_id = b.car_id
where car_id>1 and car_id<=100 order by b.car_name;

-- Table no. 1 --
create or replace table first_table as -- my first table
select car_id,
       avg(price) as avg_price,
from first_view
group by car_id order by car_id;

--- End of file ---
""".strip()

Then we would like to format the SQL-queries in this file, while letting every other non-query-SQL statement untouched. For the example above we would like to have something like this:

expected_sql_file = """
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;


CREATE OR REPLACE VIEW first_view AS -- my first view
SELECT a.car_id,
       b.car_name,
       sum(a.price) OVER (PARTITION BY b.car_name
                          ORDER BY a.car_id) as sum_price,
       a.price
FROM   sales as a
    LEFT JOIN (SELECT car_id,
                      car_name
               FROM   cars) as b
        ON a.car_id = b.car_id
WHERE  car_id > 1
   and car_id <= 100
ORDER BY b.car_name;


-- Table no. 1 --
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
       avg(price) as avg_price
FROM   first_view
GROUP BY car_id
ORDER BY car_id;

--- End of file ---
""".lstrip()

Formatting philosophy of SQL files

  • Every SQL-query is separated from above by two new lines
  • Every SQL-query is formatted via format_sql

Main function to format the SQL commands in a file

This function applies also basic validation and aborts formatting if the statements CREATE .. TABLE / VIEW appear at least twice in the same query after splitting by semicolon, warning the user that she / he may have forgotten a semicolon

format_sql_commands[source]

format_sql_commands(s, max_len=82)

Format SQL commands in s. If SELECT line is longer than max_len then reformat line

Basic file formatting

assert_and_print(
    format_sql_commands(sql_file),
    expected_sql_file
)
82
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;


CREATE OR REPLACE VIEW first_view AS -- my first view
SELECT a.car_id,
       b.car_name,
       sum(a.price) OVER (PARTITION BY b.car_name
                          ORDER BY a.car_id) as sum_price,
       a.price
FROM   sales as a
    LEFT JOIN (SELECT car_id,
                      car_name
               FROM   cars) as b
        ON a.car_id = b.car_id
WHERE  car_id > 1
   and car_id <= 100
ORDER BY b.car_name;


-- Table no. 1 --
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
       avg(price) as avg_price
FROM   first_view
GROUP BY car_id
ORDER BY car_id;

--- End of file ---

Using the /*skip-formatter*/ marker to not format some query

assert_and_print(
    format_sql_commands("""
use database my_database;

/*skip-formatter*/
create Or replace View my_view aS
select asdf, qwer
from table1;

create or replace table my_table As
Select asdf, qwer
From table2
group by asdf;
"""),
    """use database my_database;

/*skip-formatter*/
create Or replace View my_view aS
select asdf, qwer
from table1;


CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf;
""")
82
use database my_database;

/*skip-formatter*/
create Or replace View my_view aS
select asdf, qwer
from table1;


CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf;

assert_and_print(
    format_sql_commands("""
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf;
"""),
    """
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf;
""".lstrip())
82
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf;

assert_and_print(
    format_sql_commands("""
create or replace table my_table As
Select asdf, qwer
From table2
group by asdf -- some comment
;
"""),
    """
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf; -- some comment
""".lstrip())
82
CREATE OR REPLACE TABLE my_table AS
SELECT asdf,
       qwer
FROM   table2
GROUP BY asdf; -- some comment

assert_and_print(
    format_sql_commands(
"""
create table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1; /* Some comment */

create view my_view As
Select asdf
From my_table; /* Another comment */
"""
    ),
"""
CREATE TABLE my_table AS
SELECT asdf,
       qwer, /* ; */
       qwer2, -- ;
       replace(';', '', qwer3) as qwer4
FROM   table1; /* Some comment */


CREATE VIEW my_view AS
SELECT asdf
FROM   my_table; /* Another comment */
""".lstrip()
)
82
CREATE TABLE my_table AS
SELECT asdf,
       qwer, /* ; */
       qwer2, -- ;
       replace(';', '', qwer3) as qwer4
FROM   table1; /* Some comment */


CREATE VIEW my_view AS
SELECT asdf
FROM   my_table; /* Another comment */

assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1;

create view my_view As
Select asdf
From my_table;
""".lstrip()
    ),
"""
CREATE OR REPLACE TRANSIENT TABLE my_table AS
SELECT asdf,
       qwer, /* ; */
       qwer2, -- ;
       replace(';', '', qwer3) as qwer4
FROM   table1;


CREATE VIEW my_view AS
SELECT asdf
FROM   my_table;
""".lstrip()
)
82
CREATE OR REPLACE TRANSIENT TABLE my_table AS
SELECT asdf,
       qwer, /* ; */
       qwer2, -- ;
       replace(';', '', qwer3) as qwer4
FROM   table1;


CREATE VIEW my_view AS
SELECT asdf
FROM   my_table;

If the validation fails, then the function returns a dictionary instead of the formatted queries with information about the error

Semicolon validation error

assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1

create view my_view As
Select asdf
From my_table;
""".lstrip()
    ), 
    {"semicolon": {"error_code": 2, "lines": [(1, 7)]}}
)
82
{'semicolon': {'error_code': 2, 'lines': [(1, 7)]}}

Unbalanced parenthesis error

assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ; ()
( /* ) */
replace(';', '', qwer3) as Qwer4
from table1;

create view my_view As
(Select asdf
From my_table;
""".lstrip()
    ), 
    {"unbalanced_parenthesis": {"error_code": 3, "lines": [[3, 4], [9]]}}
)
82
{'unbalanced_parenthesis': {'error_code': 3, 'lines': [[3, 4], [9]]}}

Unbalanced parenthesis + semicolon error

assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ; ()
( /* ) */
replace(';', '', qwer3) as Qwer4
from table1

create view my_view As
(Select asdf
From my_table;
""".lstrip()
    ), 
    {
        "semicolon": {"error_code": 2, "lines": [(1, 8)]},        
        "unbalanced_parenthesis": {"error_code": 3, "lines": [[3, 4, 9]]},
    }
)
82
{'semicolon': {'error_code': 2, 'lines': [(1, 8)]}, 'unbalanced_parenthesis': {'error_code': 3, 'lines': [[3, 4, 9]]}}

Unbalanced case when ... end

assert_and_print(
    format_sql_commands(
"""
create or replace transient table my_table As
select asdf, Qwer, /* ; */
case when asdf = 1 then 1 as qwer,
replace(';', '', qwer3) as Qwer4
from table1;

create view my_view As
""".lstrip()
    ), 
    {"unbalanced_case": {"error_code": 4, "lines": [[3]]}}
)
82
{'unbalanced_case': {'error_code': 4, 'lines': [[3]]}}

Function to format 1 SQL file

format_sql_file[source]

format_sql_file(f, max_len=82)

Format file f with SQL commands and overwrite the file. If SELECT line is longer than 82 characters then reformat line

Return exit_code:

  • 0 = Everything already formatted
  • 1 = Formatting applied
  • 2 = Problem detected, formatting aborted
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_file)
    file.seek(0)
    format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print(
    formatted_file,
    expected_sql_file
)
82
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;


CREATE OR REPLACE VIEW first_view AS -- my first view
SELECT a.car_id,
       b.car_name,
       sum(a.price) OVER (PARTITION BY b.car_name
                          ORDER BY a.car_id) as sum_price,
       a.price
FROM   sales as a
    LEFT JOIN (SELECT car_id,
                      car_name
               FROM   cars) as b
        ON a.car_id = b.car_id
WHERE  car_id > 1
   and car_id <= 100
ORDER BY b.car_name;


-- Table no. 1 --
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
       avg(price) as avg_price
FROM   first_view
GROUP BY car_id
ORDER BY car_id;

--- End of file ---

sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1

create view my_view As
Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_forgotten_semicolon)
    file.seek(0)
    exit_code = format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print( # no formatting
    formatted_file,
    sql_forgotten_semicolon
)
assert exit_code == 2
82
Something went wrong in file: /tmp/tmp2qgedw93
[WARNING] Identified CREATE keyword more than twice within the same query at lines [(1, 7)]
You may have forgotten a semicolon (;) to delimit the queries
Aborting formatting for file /tmp/tmp2qgedw93
Aborting formatting for file /tmp/tmp2qgedw93
create or replace transient table my_table As
select asdf, Qwer, /* ; */
qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1

create view my_view As
Select asdf
From my_table;

sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1;

create view my_view As
(Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_forgotten_semicolon)
    file.seek(0)
    exit_code = format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print( # no formatting
    formatted_file,
    sql_forgotten_semicolon
)
assert exit_code == 2
82
Something went wrong in file: /tmp/tmp5rkzfb24
[WARNING] Identified unbalanced parenthesis at lines [[3], [8]]
You should check your parenthesis
Aborting formatting for file /tmp/tmp5rkzfb24
Aborting formatting for file /tmp/tmp5rkzfb24
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1;

create view my_view As
(Select asdf
From my_table;

sql_forgotten_semicolon = """
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1

create view my_view As
(Select asdf
From my_table;
""".lstrip()
with tempfile.NamedTemporaryFile(mode="r+") as file:
    file.write(sql_forgotten_semicolon)
    file.seek(0)
    exit_code = format_sql_file(file.name)
    formatted_file = file.read()
assert_and_print( # no formatting
    formatted_file,
    sql_forgotten_semicolon
)
assert exit_code == 2
82
Something went wrong in file: /tmp/tmpzf3rofts
[WARNING] Identified CREATE keyword more than twice within the same query at lines [(1, 7)]
You may have forgotten a semicolon (;) to delimit the queries
[WARNING] Identified unbalanced parenthesis at lines [[3, 8]]
You should check your parenthesis
Aborting formatting for file /tmp/tmpzf3rofts
Aborting formatting for file /tmp/tmpzf3rofts
create or replace transient table my_table As
select asdf, Qwer, /* ; */
(qwer2, -- ;
replace(';', '', qwer3) as Qwer4
from table1

create view my_view As
(Select asdf
From my_table;

Function to format many SQL files

format_sql_files[source]

format_sql_files(files, recursive=False, max_len=82)

Format SQL files

with tempfile.TemporaryDirectory() as tmp_dir:
    with open(os.path.join(tmp_dir, "tmp"), "w") as f:
        f.write(sql_file)
    with open(os.path.join(tmp_dir, "tmp2"), "w") as f:
        f.write(sql_file)
    format_sql_files([os.path.join(tmp_dir, "tmp"), os.path.join(tmp_dir, "tmp2")])
    with open(os.path.join(tmp_dir, "tmp"), "r") as f:
        formatted_file = f.read()
    assert_and_print(
        formatted_file,
        expected_sql_file
    )
    with open(os.path.join(tmp_dir, "tmp2"), "r") as f:
        formatted_file = f.read()
    assert_and_print(
        formatted_file,
        expected_sql_file
    )
82
82
All specified files were formatted!
--- Views for some nice data mart ---
use database my_database;
use schema my_schema;


CREATE OR REPLACE VIEW first_view AS -- my first view
SELECT a.car_id,
       b.car_name,
       sum(a.price) OVER (PARTITION BY b.car_name
                          ORDER BY a.car_id) as sum_price,
       a.price
FROM   sales as a
    LEFT JOIN (SELECT car_id,
                      car_name
               FROM   cars) as b
        ON a.car_id = b.car_id
WHERE  car_id > 1
   and car_id <= 100
ORDER BY b.car_name;


-- Table no. 1 --
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
       avg(price) as avg_price
FROM   first_view
GROUP BY car_id
ORDER BY car_id;

--- End of file ---

--- Views for some nice data mart ---
use database my_database;
use schema my_schema;


CREATE OR REPLACE VIEW first_view AS -- my first view
SELECT a.car_id,
       b.car_name,
       sum(a.price) OVER (PARTITION BY b.car_name
                          ORDER BY a.car_id) as sum_price,
       a.price
FROM   sales as a
    LEFT JOIN (SELECT car_id,
                      car_name
               FROM   cars) as b
        ON a.car_id = b.car_id
WHERE  car_id > 1
   and car_id <= 100
ORDER BY b.car_name;


-- Table no. 1 --
CREATE OR REPLACE TABLE first_table AS -- my first table
SELECT car_id,
       avg(price) as avg_price
FROM   first_view
GROUP BY car_id
ORDER BY car_id;

--- End of file ---

format_sql_files_cli[source]

format_sql_files_cli()

Format SQL files