From e6c2163e159fa8995ffd0d76197e51c74e5dfd03 Mon Sep 17 00:00:00 2001 From: tkaunlaky-e6 Date: Thu, 23 Apr 2026 15:18:18 +0530 Subject: [PATCH] fix(e6): gate TO_UNIX_TIMESTAMP /1000 on executor type The native executor already returns seconds (matching Databricks), while the Java executor returns milliseconds. The transpiler unconditionally divided by 1000, producing wrong results on NE. Introduce E6_EXECUTOR_TYPE env flag (default: "java"): - "java" -> TO_UNIX_TIMESTAMP(...) / 1000 - "native" -> TO_UNIX_TIMESTAMP(...) Applied to both the no-format and PARSE_DATETIME code paths. Flag is also mirrored at module scope in converter_api.py for startup logging, consistent with FIX_QUOTE_ESCAPES. --- converter_api.py | 6 +++++- sqlglot/dialects/e6.py | 16 ++++++++++------ tests/dialects/test_e6.py | 24 ++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 7 deletions(-) diff --git a/converter_api.py b/converter_api.py index 7ba047a253..51478688d9 100644 --- a/converter_api.py +++ b/converter_api.py @@ -48,6 +48,9 @@ STORAGE_ENGINE_PORT = os.getenv("STORAGE_ENGINE_PORT", 9005) SKIP_COMMENT = os.getenv("SKIP_COMMENT", "True") # Always strip multi-line comments FIX_QUOTE_ESCAPES = os.getenv("FIX_QUOTE_ESCAPES", "False") # Fix '' inside single-quoted strings +E6_EXECUTOR_TYPE = os.getenv( + "E6_EXECUTOR_TYPE", "java" +) # "java" divides TO_UNIX_TIMESTAMP by 1000; "native" does not storage_service_client = None @@ -65,10 +68,11 @@ logger.info("Storage Service Client is created") logger.info( "Environment flags — ENABLE_GUARDRAIL: %s, SKIP_COMMENT: %s, FIX_QUOTE_ESCAPES: %s, " - "STORAGE_ENGINE_URL: %s, STORAGE_ENGINE_PORT: %s", + "E6_EXECUTOR_TYPE: %s, STORAGE_ENGINE_URL: %s, STORAGE_ENGINE_PORT: %s", ENABLE_GUARDRAIL, SKIP_COMMENT, FIX_QUOTE_ESCAPES, + E6_EXECUTOR_TYPE, STORAGE_ENGINE_URL, STORAGE_ENGINE_PORT, ) diff --git a/sqlglot/dialects/e6.py b/sqlglot/dialects/e6.py index 62e3774292..5fb8080b67 100644 --- a/sqlglot/dialects/e6.py +++ b/sqlglot/dialects/e6.py @@ -2626,6 +2626,10 @@ def to_unix_timestamp_sql( time_expr = expression.this format_expr = expression.args.get("format") + # Java executor returns milliseconds so divide by 1000 to get seconds. + # Native executor already returns seconds (matches Databricks) so skip the division. + is_native = os.getenv("E6_EXECUTOR_TYPE", "java").lower() == "native" + if format_expr: format_str = format_expr.this for key, value in E6().TIME_MAPPING_FOR_PARSE_FUNCTIONS.items(): @@ -2633,15 +2637,15 @@ def to_unix_timestamp_sql( # Generate final function with or without format argument if format_str: - # Build expression tree for TO_UNIX_TIMESTAMP(PARSE_DATETIME(...)) / 1000 parse_datetime_expr = exp.Anonymous( this="PARSE_DATETIME", expressions=[exp.Literal.string(format_str), time_expr] ) to_unix_expr = exp.Anonymous( this="TO_UNIX_TIMESTAMP", expressions=[parse_datetime_expr] ) - div_expr = exp.Div(this=to_unix_expr, expression=exp.Literal.number("1000")) - return self.sql(div_expr) + if is_native: + return self.sql(to_unix_expr) + return self.sql(exp.Div(this=to_unix_expr, expression=exp.Literal.number("1000"))) # Wrap argument in CAST(... AS TIMESTAMP) since E6's TO_UNIX_TIMESTAMP # only accepts TIMESTAMP/DATE types, not integers or strings @@ -2651,10 +2655,10 @@ def to_unix_timestamp_sql( ): time_expr = exp.Cast(this=time_expr, to=exp.DataType.build("TIMESTAMP")) - # Build expression tree for TO_UNIX_TIMESTAMP(CAST(... AS TIMESTAMP)) / 1000 to_unix_expr = exp.Anonymous(this="TO_UNIX_TIMESTAMP", expressions=[time_expr]) - div_expr = exp.Div(this=to_unix_expr, expression=exp.Literal.number("1000")) - return self.sql(div_expr) + if is_native: + return self.sql(to_unix_expr) + return self.sql(exp.Div(this=to_unix_expr, expression=exp.Literal.number("1000"))) def lateral_sql(self, expression: exp.Lateral) -> str: expression.set("view", True) diff --git a/tests/dialects/test_e6.py b/tests/dialects/test_e6.py index ebccc1fd0b..70d6de3e82 100644 --- a/tests/dialects/test_e6.py +++ b/tests/dialects/test_e6.py @@ -2453,6 +2453,30 @@ def test_unixtime_functions(self): }, ) + def test_unix_timestamp_native_executor(self): + # With E6_EXECUTOR_TYPE=native, TO_UNIX_TIMESTAMP is emitted without /1000 + # because NE already returns seconds. Default (java) keeps the /1000 divisor. + os.environ["E6_EXECUTOR_TYPE"] = "native" + try: + # no-format branch + self.validate_all( + "SELECT TO_UNIX_TIMESTAMP(CAST(A AS TIMESTAMP))", + read={"databricks": "SELECT UNIX_TIMESTAMP(A)"}, + ) + # format branch (PARSE_DATETIME) + self.validate_all( + "SELECT TO_UNIX_TIMESTAMP(PARSE_DATETIME('%Y-%m-%d', '2016-04-08'))", + read={"databricks": "SELECT unix_timestamp('2016-04-08', 'yyyy-MM-dd')"}, + ) + finally: + os.environ.pop("E6_EXECUTOR_TYPE", None) + + # Default (flag unset) -> java behavior, /1000 preserved + self.validate_all( + "SELECT TO_UNIX_TIMESTAMP(CAST(A AS TIMESTAMP)) / 1000", + read={"databricks": "SELECT UNIX_TIMESTAMP(A)"}, + ) + def test_timestamp_seconds(self): # Test basic TIMESTAMP_SECONDS with integer literal self.validate_all(