Helper functions for SQL-text preparation
assert_and_print("some string", "some string")
assert_and_print(
compress_dicts(
[
{"string": "some string", "key1": True, "key2": True},
{"string": ". qwerty", "key1": True, "key2": True},
{"string": " asdf", "key1": True, "key2": True},
{"string": "another string", "key1": True, "key2": False},
{"string": " bla bla", "key1": True, "key2": False},
{"string": "1234", "key1": False, "key2": False},
],
("key1", "key2")
),
[
{"string": "some string. qwerty asdf", "key1": True, "key2": True},
{"string": "another string bla bla", "key1": True, "key2": False},
{"string": "1234", "key1": False, "key2": False},
]
)
assert_and_print(
remove_whitespaces_newline("select asdf\n from table1 \nwhere asdf = 1 \n group by asdf"),
"select asdf\nfrom table1\nwhere asdf = 1\ngroup by asdf"
)
assert_and_print(
remove_whitespaces_comments(
"select asdf[C] from table1 [CS]where asdf = 1 [C] group by asdf"
),
"select asdf[C]from table1[CS]where asdf = 1[C]group by asdf"
)
assert_and_print(
remove_whitespaces_comments(
"select asdf[C][CS]/* asdf [CI] */[C] from table1 [CS]where asdf = 1 [C] group by asdf"
),
"select asdf[C][CS]/* asdf[CI]*/[C]from table1[CS]where asdf = 1[C]group by asdf"
)
Only whitespaces
assert_and_print(
remove_redundant_whitespaces("\nsome string many whitespaces some more"),
"some string many whitespaces some more"
)
Whitespaces and newlines
assert_and_print(
remove_redundant_whitespaces("\nsome string \n many whitespaces\n \nsome more\n"),
"some string \n many whitespaces\n \nsome more"
)
assert_and_print(
remove_whitespaces_parenthesis("select asdf, substr( qwer, 1, 2 ) as qwerty"),
"select asdf, substr(qwer, 1, 2) as qwerty"
)
assert_and_print(
add_whitespaces_between_symbols(
"WHERE asdf= 1 and qwer=>1 or blabla ='asdf'"
), "WHERE asdf = 1 and qwer => 1 or blabla = 'asdf'"
)
assert_and_print(
mark_ci_comments("select /* asdf \n qwer */"),
"select /* asdf [CI] qwer */"
)
assert_and_print(
mark_ci_comments("select /* asdf \n qwe \n rqwer */"),
"select /* asdf [CI] qwe [CI] rqwer */"
)
assert_and_print(
mark_comments(
"""
select asdf, -- some comment
qwer, qwer2,
/* comment line */
qwer3,
qwer4 -- comment
"""
),
"""
select asdf, -- some comment[C]
qwer, qwer2,
[CS]/* comment line */[C]
qwer3,
qwer4 -- comment[C]
"""
)
assert_and_print(
mark_comments(
"""
select asdf, -- some comment
qwer, qwer2, -- another comment
/* comment line */
qwer3,
-- another comment line
qwer4 -- comment
"""
),
"""
select asdf, -- some comment[C]
qwer, qwer2, -- another comment[C]
[CS]/* comment line */[C]
qwer3,
[CS]-- another comment line[C]
qwer4 -- comment[C]
"""
)
assert_and_print(
mark_comments(
"""
create or replace my_table as
/* some comment
some new comment line */
select asdf,
qwer, qwer2
from table1
"""
),
"""
create or replace my_table as
[CS]/* some comment[CI] some new comment line */[C]
select asdf,
qwer, qwer2
from table1
"""
)
assert_and_print(
mark_comments(
"""
SELECT asdf, qwer, /* another comment */
qwer1,
/* inline comment */
qwer2
FROM table1
WHERE asdf=1"""
),
"\nSELECT asdf, qwer, /* another comment */[C]\nqwer1, \n[CS]/* inline comment */[C]\nqwer2\nFROM table1\nWHERE asdf=1"
)
assert_and_print(
mark_comments(
"""
select a.asdf, b.qwer, -- some comment here
/* and here is a line comment inside select */
substr(c.asdf, 1, 2) as substr_asdf,
/* some commenT there */
"""
),
"""
select a.asdf, b.qwer, -- some comment here[C]
[CS]/* and here is a line comment inside select */[C]
substr(c.asdf, 1, 2) as substr_asdf,
[CS]/* some commenT there */[C]
"""
)
assert_and_print(
mark_comments(
"""
/* multi line
comment */
select a.asdf, b.qwer, -- some comment here
/* and here is a line comment inside select */
substr(c.asdf, 1, 2) as substr_asdf,
/* some commenT
there */
case when a.asdf= 1 then 'b' /* here a case comment */
"""
),
"""
[CS]/* multi line[CI]comment */[C]
select a.asdf, b.qwer, -- some comment here[C]
[CS]/* and here is a line comment inside select */[C]
substr(c.asdf, 1, 2) as substr_asdf,
[CS]/* some commenT [CI]there */[C]
case when a.asdf= 1 then 'b' /* here a case comment */[C]
"""
)
assert_and_print(
mark_comments(
"""
select a.asdf, b.qwer, -- some comment here
/* and here is a line comment inside select */
/* some commenT there */
case when a.asdf= 1 then 'b' /* here a case comment */
"""
),
"""
select a.asdf, b.qwer, -- some comment here[C]
[CS]/* and here is a line comment inside select */[C]
[CS]/* some commenT there */[C]
case when a.asdf= 1 then 'b' /* here a case comment */[C]
"""
)
assert_and_print(
split_query(
"""
create table my_table as -- some table
seLect asdf, [CS]/* some comment */ qwer, 'blabla' as qwerty
from table1
""".strip()
),
[
{"string": "create table my_table as ", "comment": False, "quote": False, "select": False},
{"string": "-- some table", "comment": True, "quote": False, "select": False},
{"string": "\n", "comment": False, "quote": False, "select": False},
{"string": "seLect asdf, ", "comment": False, "quote": False, "select": True},
{"string": "[CS]/* some comment */", "comment": True, "quote": False, "select": True},
{"string": " qwer, '", "comment": False, "quote": False, "select": True},
{"string": "blabla", "comment": False, "quote": True, "select": True},
{"string": "' as qwerty\n", "comment": False, "quote": False, "select": True},
{"string": "from table1", "comment": False, "quote": False, "select": False},
]
)
This function is for more comfortable testing purposes in core
assert_and_print(
split_apply_concat(
"select asdf, /* some comment */", lambda split_s: [d for d in split_s if not d["comment"]]
),
"select asdf, "
)
assert_and_print(
split_comment_quote(
"""
create table my_table as -- some table
seLect asdf, [CS]/* some comment */ qwer, 'blabla' as qwerty
from table1
""".strip()
),
[
{"string": "create table my_table as ", "comment": False, "quote": False},
{"string": "-- some table", "comment": True, "quote": False},
{"string": "\nseLect asdf, ", "comment": False, "quote": False},
{"string": "[CS]/* some comment */", "comment": True, "quote": False},
{"string": " qwer, '", "comment": False, "quote": False},
{"string": "blabla", "comment": False, "quote": True},
{"string": "' as qwerty\nfrom table1", "comment": False, "quote": False},
]
)
assert_and_print(
split_comment_quote(
"""replace('"";', "';'", asdf2) as asdf5"""
),
[
{"string": """replace('""", "comment": False, "quote": False},
{"string": """"";""", "comment": False, "quote": True},
{"string": """', \"""", "comment": False, "quote": False},
{"string": """';'""", "comment": False, "quote": True},
{"string": """\", asdf2) as asdf5""", "comment": False, "quote": False}
]
)
assert_and_print(
split_comment(
"""
create table my_table as -- some table
seLect asdf, [CS]/* some comment */ qwer, 'blabla' as qwerty
from table1
""".strip()
),
[
{"string": "create table my_table as ", "comment": False},
{"string": "-- some table", "comment": True},
{"string": "\nseLect asdf, ", "comment": False},
{"string": "[CS]/* some comment */", "comment": True},
{"string": " qwer, 'blabla' as qwerty\nfrom table1", "comment": False},
]
)
assert_and_print(
identify_in_sql(r"\bcase\b", "select asdf, qwer, case when blabla case"),
[19, 36]
)
assert_and_print(
identify_in_sql(r"\n", "select asdf,\nqwer"),
[12]
)
assert_and_print(
identify_in_sql(r"\n", "select asdf,\nqwer, -- some comment\n"),
[12, 34]
)
assert_and_print(
identify_in_sql(r"\n", "select asdf,\nqwer,[CS]-- some comment[C]\nqwer2"),
[12, 40]
)
assert_and_print(
split_by_semicolon(
"""
use database my_database; -- ; ; ;;
use schema my_schema; /* -- ; */
create or replace table my_table as
select asdf, qwer, /* ;; -- ; */
replace('"";', "';'", asdf2) as asdf5
qwer2 from -- ;
table2;
use schema_another_schema;
"""
),
[
"\nuse database my_database",
" -- ; ; ;;\nuse schema my_schema",
""" /* -- ; */\n\ncreate or replace table my_table as\nselect asdf, qwer, /* ;; -- ; */\nreplace(\'"";\', "\';\'", asdf2) as asdf5\nqwer2 from -- ;\ntable2""",
"\n\nuse schema_another_schema",
"\n"
]
)
assert_and_print(
replace_newline_chars(
"select asdf,\nqwer, /* some comment \n with multiple lines \n*/[C], some_field from\n table"
),
"select asdf, qwer, /* some comment \n with multiple lines \n*/[C], some_field from table"
)
assert_and_print(
sub_in_sql(
r",([\w\d])", r", \1", "select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)"
),
"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)"
)
assert_and_print(
add_whitespaces_after_comma(
"select asdf,qwer, /*asdf,qwer*/ substr(',asdf',1, 2)"
),
"select asdf, qwer, /*asdf,qwer*/ substr(',asdf', 1, 2)"
)
assert_and_print(
add_whitespaces_after_comma("select asdf,qwer,substr(asdf,1,2) as qwerty"),
"select asdf, qwer, substr(asdf, 1, 2) as qwerty"
)
assert_and_print(
add_whitespaces_after_comma("select asdf, qwer, substr(asdf,1,2) as qwerty"),
"select asdf, qwer, substr(asdf, 1, 2) as qwerty"
)
Function to identify end of fields in SELECT. Usually this is a comma but more generally specially when using functions this strategy does not work anymore. For the sake of explanation, consider for instance
substr(asdf, 1, 2)
In that case, we would like to not add a newline for each comma.
assert_and_print(
identify_end_of_fields(
"""
select asdf, substr(asdf, 1, 2) as qwer, concat(substr(asdf, 1, 2), substr(asdf, 3, 2)) as qwer2, asdf5
"""),
[12, 40, 97]
)
assert_and_print(
identify_end_of_fields(
"""
select concat(substr(concat(')0', substr(asdf, 1, 2)), -2, 2), substr(concat('(0', substr(asdf, 3, 2)), -2, 2)) as qwer, asdf
"""),
[120]
)
More advanced with comments
assert_and_print(identify_end_of_fields(
"""
select asdf, /* Some commnent */[C]qwerty, substr(asdf, 1, 2) as qwer, -- Some comment[C] asdf5
"""), [42]
)
assert_and_print(identify_end_of_fields(
"""
select asdf, [CS]/* Some commnent */[C]qwerty, substr(asdf, 1, 2) as qwer, -- Some comment[C] asdf5
"""), [46]
)
Even more tricky with a comma inside comment
assert_and_print(identify_end_of_fields(
"""
select asdf, /* Some, commnent */[C]qwerty, substr(asdf, 1, 2) as qwer, -- Some, comment[C] asdf5
"""), [43]
)
Having the end of fields positions, we need function to add a newline and proper indentation
assert_and_print(
add_newline_indentation(
"select asdf, substr(asdf, 1, 2) as qwer, concat(substr(asdf, 1, 2), substr(asdf, 3, 2)) as qwer2, asdf5",
7
),
"""
select asdf,
substr(asdf, 1, 2) as qwer,
concat(substr(asdf, 1, 2), substr(asdf, 3, 2)) as qwer2,
asdf5
""".strip()
)
assert_and_print(
add_newline_indentation(
"select asdf, substr(asdf, 1, 2) as qwer, lead(qwer) OVER (partition by asdf order by qwer), asdf2",
7
),
"""
select asdf,
substr(asdf, 1, 2) as qwer,
lead(qwer) OVER (partition by asdf order by qwer),
asdf2
""".strip()
)
assert_and_print(
add_newline_indentation(
"select asdf, replace(',', '', asdf) as qwer, lead(qwer) OVER (partition by asdf order by qwer), asdf2",
7
),
"""
select asdf,
replace(',', '', asdf) as qwer,
lead(qwer) OVER (partition by asdf order by qwer),
asdf2
""".strip()
)
assert (
extract_outer_subquery(
"() () (\nSELECT () (\nSELECT ())) ()"
) == [6, 30]
)
assert check_sql_query("""
--- Table 1---
creaTe or replace table my_table as
select asdf
from table
where asdf = 1
""".strip())
assert check_sql_query("""
--- Table 1---
creaTe or replace view my_table as
select asdf
from table
where asdf = 1
""".strip())
assert check_sql_query("""
SELECT qwer, asdf
""")
assert not check_sql_query("use database my_database;")
assert not check_sql_query("use database my_database; /* create table */")
assert not check_sql_query("""
create or replace task my_task as
""")
assert not check_sql_query("""
create or replace task my_task as
create or replace table as
select asdf
""")
assert not check_sql_query("""
use schema my_schema;
""")
assert check_skip_marker("""
SELECT asdf,
qwer,
/*skip-formatter*/
asdf2
FRoM table1
""")
assert not check_skip_marker("""
SELECT asdf,
qwer,
asdf2
FRoM table1
""")
assert_and_print(
identify_create_table_view(
"""
cReate or Replace table my_table as
select asdf, qwer
from table1
create table qwerty as
select field, field2
from table2;
"""
), [2, 6]
)
assert_and_print(
count_lines(
"""
cReate or Replace table my_table as
select asdf, qwer
from table1
create table qwerty as
select field, field2
from table2;
"""
), 8
)
assert_and_print(
find_line_number(
"""
cReate or Replace table my_table as
select asdf, qwer
from table1
create table qwerty as
select field, field2
from table2;
""", [1, 68]),
[2, 6]
)
assert_and_print(disimilarity("hello world", "hello world"), 0)
assert_and_print(disimilarity("hello world", "hello world!"), 2)
assert_and_print(
assign_comment(
"""
select asdf,
qwer,
case when asdf = 1 and
asdf = 2 then 2
when asdf = 3 then 3
else 0 end as qwerty,
qwer2
""".strip(),
[
{"comment": "/* some comment */[C]", "preceding": "select asdf, qwer, "},
{"comment": "-- comment there[C]", "preceding": "case when asdf = 1 "},
{"comment": "-- comment here[C]", "preceding": "and asdf = 2 "},
{"comment": "/* bla bla */[C]", "preceding": "then 2 when asdf = 3 then 3"}
]
),
"""
select asdf,
qwer, /* some comment */
case when asdf = 1 and -- comment there
asdf = 2 then 2 -- comment here
when asdf = 3 then 3 /* bla bla */
else 0 end as qwerty,
qwer2
""".strip()
)
assert_and_print(
assign_comment(
"""
select asdf,
qwer,
case when asdf = 1 and
asdf = 2 then 2
when asdf = 3 then 3
else 0 end as qwerty,
qwer2
""".strip(),
[
{"comment": "/* some comment */[C]", "preceding": "select asdf, qwer, "},
{"comment": "-- comment there[C]", "preceding": "case when asdf = 1"},
{"comment": "-- comment here[C][CS]/* Whole line comment */[C]", "preceding": "and asdf = 2"},
{"comment": "/* bla bla */[C]", "preceding": "then 2 when asdf = 3 then 3"}
]
),
"""
select asdf,
qwer, /* some comment */
case when asdf = 1 and -- comment there
asdf = 2 then 2 -- comment here
/* Whole line comment */
when asdf = 3 then 3 /* bla bla */
else 0 end as qwerty,
qwer2
""".strip()
)
assert_and_print(
assign_comment(
"""
select asdf,
qwer,
case when asdf = 1 and
asdf = 2 then 2
when asdf = 3 then 3
else 0 end as qwerty,
qwer2
""".strip(),
[
{"comment": "/* some comment */[C]", "preceding": "select asdf, qwer, "},
{"comment": "-- comment there[C]", "preceding": "case when asdf = 1"},
{"comment": "[CS]/* Whole line comment */[C]", "preceding": "and asdf = 2"},
{"comment": "/* bla bla */[C]", "preceding": "then 2 when asdf = 3 then 3"}
]
),
"""
select asdf,
qwer, /* some comment */
case when asdf = 1 and -- comment there
asdf = 2 then 2
/* Whole line comment */
when asdf = 3 then 3 /* bla bla */
else 0 end as qwerty,
qwer2
""".strip()
)
assert_and_print(
assign_comment(
"""
select asdf,
qwer,
case when asdf = 1 and
asdf = 2 then 2
when asdf = 3 then 3
else 0 end as qwerty,
qwer2
""".strip(),
[
{"comment": "[CS]/* Whole line after select */[C]", "preceding": "select asdf,"},
{"comment": "/* some comment */[C]", "preceding": "qwer, "},
{"comment": "-- comment there[C]", "preceding": "case when asdf = 1"},
{"comment": "[CS]/* Whole line comment */[C]", "preceding": "and asdf = 2"},
{"comment": "/* bla bla */[C]", "preceding": "then 2 when asdf = 3 then 3"},
{"comment": "[CS]/* another whole line comment */[C]", "preceding": "else 0 end as qwerty,"}
]
),
"""
select asdf,
/* Whole line after select */
qwer, /* some comment */
case when asdf = 1 and -- comment there
asdf = 2 then 2
/* Whole line comment */
when asdf = 3 then 3 /* bla bla */
else 0 end as qwerty,
/* another whole line comment */
qwer2
""".strip()
)
assert_and_print(
assign_comment(
"""
select distinct asdf,
qwer,
case when asdf = 1 and
asdf = 2 then 2
when asdf = 3 then 3
else 0 end as qwerty,
qwer2
""".strip(),
[
{"comment": "[CS]/* Whole line after select */[C]", "preceding": "select distinct asdf,"},
{"comment": "/* some comment */[C]", "preceding": "qwer, "},
{"comment": "-- comment there[C]", "preceding": "case when asdf = 1"},
{"comment": "[CS]/* Whole line comment */[C]", "preceding": "and asdf = 2"},
{"comment": "/* bla bla */[C]", "preceding": "then 2 when asdf = 3 then 3"}
]
),
"""
select distinct asdf,
/* Whole line after select */
qwer, /* some comment */
case when asdf = 1 and -- comment there
asdf = 2 then 2
/* Whole line comment */
when asdf = 3 then 3 /* bla bla */
else 0 end as qwerty,
qwer2
""".strip()
)
assert_and_print(
assign_comment(
"select asdf,\n antworqwer",
[{'comment': '-- Some comment[C]', 'preceding': 'select asdf, '},
{'comment': '-- Another comment', 'preceding': 'antworqwer '}]
),
"""
select asdf, -- Some comment
antworqwer -- Another comment
""".strip()
)