Schema Info

Schema Name: sfdl_prod_drivethru.websites_analytics_demo
  • created: 2025-02-25 23:05:13
  • disabled: False
  • format: raw
  • id: 3471
  • is_partitioned: True
  • is_schema_datatyped: True
  • name:
    {
        "database": "sfdl_prod_drivethru",
        "table": "websites_analytics_demo"
    }
  • type: drivethrujson
Schema Versions: 3
    • created: 2025-02-28 03:05:06
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`",
          "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n",
          "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo"
      ]
    • hive_path: s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo
    • id: 1209884
    • processed: 0
    • raw_data:
      {
          "bsPopper": "static",
          "companyId": "7b40898a-c95a-4756-88ee-4729a1275427",
          "event": "pageView",
          "facilityId": null,
          "name": "Home Page",
          "pageId": "5aa828cb5d4f76061ab1e215",
          "pageVersion": "31c3bc3f-ad06-4674-9b50-0c1399a146cd",
          "sessionId": "4hWGsSbyLadOh1aDL6oX_KkfaOphKMsg",
          "siteId": "5aa828bc7a4768a6789e2340",
          "timeStamp": "2025-02-28T08:46:31.780Z",
          "type": "voyager/pages/types/HOME_PAGE",
          "userLat": 39.0572,
          "userLong": -94.5933,
          "widgetId": "9e2b21be-28fb-46ee-9f40-bc38c004853d"
      }
    • schema_attributes:
      {
          "bsPopper": "string",
          "categoryused": "string",
          "companyId": "string",
          "companyid": "string",
          "event": "string",
          "eventlocation": "string",
          "facilityId": "string",
          "facilityid": "string",
          "imagesViewed": "string",
          "name": "string",
          "pageId": "string",
          "pageVersion": "string",
          "pageid": "string",
          "pageversion": "string",
          "sessionId": "string",
          "sessionid": "string",
          "siteId": "string",
          "siteid": "string",
          "testid": "string",
          "timeSpent": "string",
          "timeStamp": "string",
          "timestamp": "string",
          "trackclick": "string",
          "type": "string",
          "unitsshown": "string",
          "userLat": "string",
          "userLong": "string",
          "userlat": "string",
          "userlong": "string",
          "widgetId": "string",
          "widgetid": "string"
      }
    • schema_name_id: 3471
    • schema_scan_id: 137955199
    • updated: None
    • created: 2025-02-27 00:05:06
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`",
          "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n",
          "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo"
      ]
    • hive_path: s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo
    • id: 1208390
    • processed: 0
    • raw_data:
      {
          "categoryused": "Small",
          "companyid": "60f040b3-fbcd-4c2d-892f-62673b3ec7ef",
          "event": "pageView",
          "eventlocation": "themeOne",
          "facilityid": "e0f8e3e4-82f0-43aa-b997-3a9269ea79cc",
          "name": "Self Storage Units | NJ, NY, PA, and AZ",
          "pageid": "5bc768f99871560676d89e83",
          "pageversion": "5d7d8134-5a5b-416f-9b82-2f6c5a5aaaa5",
          "sessionid": "Sx5W_rlSyBS3rc29VaYsq5YUXSva84lc",
          "siteid": "5bc768d1417f9dfb8aa5e0e1",
          "testid": "actions-movein",
          "timestamp": "2025-02-26T15:12:36.774Z",
          "trackclick": "move-in",
          "type": "voyager/pages/types/HOME_PAGE",
          "unitsshown": 1,
          "userlat": 39.0572,
          "userlong": -94.5933,
          "widgetid": "88f72d83-138f-4e1f-b441-a73afbb18076"
      }
    • schema_attributes:
      {
          "categoryused": "string",
          "companyId": "string",
          "companyid": "string",
          "event": "string",
          "eventlocation": "string",
          "facilityId": "string",
          "facilityid": "string",
          "imagesViewed": "string",
          "name": "string",
          "pageId": "string",
          "pageVersion": "string",
          "pageid": "string",
          "pageversion": "string",
          "sessionId": "string",
          "sessionid": "string",
          "siteId": "string",
          "siteid": "string",
          "testid": "string",
          "timeSpent": "string",
          "timeStamp": "string",
          "timestamp": "string",
          "trackclick": "string",
          "type": "string",
          "unitsshown": "string",
          "userLat": "string",
          "userLong": "string",
          "userlat": "string",
          "userlong": "string",
          "widgetid": "string"
      }
    • schema_name_id: 3471
    • schema_scan_id: 137888858
    • updated: None
    • created: 2025-02-26 01:06:52
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`",
          "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`type`STRING,`facilityId`STRING,`name`STRING,`companyId`STRING,`imagesViewed`STRING,`sessionId`STRING,`event`STRING,`timeStamp`STRING,`timeSpent`STRING,`pageId`STRING,`pageVersion`STRING,`userLong`STRING,`siteId`STRING,`userLat`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n",
          "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo"
      ]
    • hive_path: s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo
    • id: 1207624
    • processed: 1
    • raw_data:
      {
          "companyId": "7b40898a-c95a-4756-88ee-4729a1275427",
          "event": "pageView",
          "facilityId": null,
          "imagesViewed": 5,
          "name": "All Locations",
          "pageId": "5aa828cb5d4f76061ab1e216",
          "pageVersion": "38daf8f9-87e3-4f02-b99e-4eb184b3e9f1",
          "sessionId": "g6RzU61daclFOkwnddfJG9hJuBzVzNC-",
          "siteId": "5aa828bc7a4768a6789e2340",
          "timeSpent": 10,
          "timeStamp": "2025-02-26T06:46:43.691Z",
          "type": "voyager/pages/types/FACILITIES_LIST_PAGE",
          "userLat": 39.0572,
          "userLong": -94.5933
      }
    • schema_attributes:
      {
          "companyId": "string",
          "event": "string",
          "facilityId": "string",
          "imagesViewed": "string",
          "name": "string",
          "pageId": "string",
          "pageVersion": "string",
          "sessionId": "string",
          "siteId": "string",
          "timeSpent": "string",
          "timeStamp": "string",
          "type": "string",
          "userLat": "string",
          "userLong": "string"
      }
    • schema_name_id: 3471
    • schema_scan_id: 137830123
    • updated: 2025-02-26 01:07:05
Schema Scans: 3
Last at 2025-02-28 03:05:05
    • duration: 0:00:08.975242
    • exit_message:
      {
          "exception": "failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: companyid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/table_generator.py\", line 24, in apply_latest_version\n    self.runner.run_multi_query(schema_version.ddl, timeout=DDLQueueWorker.TIMEOUT)\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 133, in run_multi_query\n    result = self.run_single_query(query, timeout, database)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 51, in run_single_query\n    return self._run_single_query(query, timeout, database)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 120, in _run_single_query\n    raise e\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 103, in _run_single_query\n    raise AthenaRunnerQueryException('Query failed: {} `{}`'.format(info['QueryExecution']['Status']['StateChangeReason'], query))\nstodl.athena_runner.exceptions.AthenaRunnerQueryException: Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: companyid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n`\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/schema_scanner.py\", line 102, in scan\n    response = service.run()\n               ^^^^^^^^^^^^^\n  File \"/schema_manager/schema_generator/process_hive_table.py\", line 98, in run\n    response['ddl_changed'] = table_generator.run()\n                              ^^^^^^^^^^^^^^^^^^^^^\n  File \"/schema_manager/schema_generator/table_generator.py\", line 35, in run\n    self.apply_latest_version()\n  File \"/schema_manager/schema_generator/table_generator.py\", line 26, in apply_latest_version\n    raise Exception('failed to apply schema `{}`: {}'.format(e, schema_version.ddl))\nException: failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: companyid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\n: Expecting value: line 1 column 1 (char 0)"
      }
    • id: 137955199
    • payload:
      {
          "datatype_dict": null,
          "file_format": "json",
          "is_schema_datatyped": true,
          "partition": {
              "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28",
              "name": "dt",
              "type": "string",
              "value": "2025-02-28"
          },
          "s3": {
              "bucket": "sfdl-drivethrujson-drivethru-prod",
              "hive_path": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo",
              "key": "raw_incremental/websites_analytics_demo/dt=2025-02-28/events_03_00.json.gz"
          },
          "schema": {
              "name": "sfdl_prod_drivethru.websites_analytics_demo",
              "type": "drivethrujson"
          },
          "trace_context": {
              "parent_id": 17934848701225897929,
              "service_name": "drive-thru",
              "span_id": 14735032474575148209,
              "trace_id": 8828131349323467067
          }
      }
    • running: False
    • schema_name_id: 3471
    • start_time: 2025-02-28 03:05:05
    • success: False
    • trace_id: 8828131349323467067
    • duration: 0:00:11.747622
    • exit_message:
      {
          "exception": "failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: pageid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/table_generator.py\", line 24, in apply_latest_version\n    self.runner.run_multi_query(schema_version.ddl, timeout=DDLQueueWorker.TIMEOUT)\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 133, in run_multi_query\n    result = self.run_single_query(query, timeout, database)\n             ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 51, in run_single_query\n    return self._run_single_query(query, timeout, database)\n           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 120, in _run_single_query\n    raise e\n  File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 103, in _run_single_query\n    raise AthenaRunnerQueryException('Query failed: {} `{}`'.format(info['QueryExecution']['Status']['StateChangeReason'], query))\nstodl.athena_runner.exceptions.AthenaRunnerQueryException: Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: pageid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n`\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/schema_scanner.py\", line 102, in scan\n    response = service.run()\n               ^^^^^^^^^^^^^\n  File \"/schema_manager/schema_generator/process_hive_table.py\", line 98, in run\n    response['ddl_changed'] = table_generator.run()\n                              ^^^^^^^^^^^^^^^^^^^^^\n  File \"/schema_manager/schema_generator/table_generator.py\", line 35, in run\n    self.apply_latest_version()\n  File \"/schema_manager/schema_generator/table_generator.py\", line 26, in apply_latest_version\n    raise Exception('failed to apply schema `{}`: {}'.format(e, schema_version.ddl))\nException: failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: pageid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\n: Expecting value: line 1 column 1 (char 0)"
      }
    • id: 137888858
    • payload:
      {
          "datatype_dict": null,
          "file_format": "json",
          "is_schema_datatyped": true,
          "partition": {
              "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27",
              "name": "dt",
              "type": "string",
              "value": "2025-02-27"
          },
          "s3": {
              "bucket": "sfdl-drivethrujson-drivethru-prod",
              "hive_path": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo",
              "key": "raw_incremental/websites_analytics_demo/dt=2025-02-27/events_09_47.json.gz"
          },
          "schema": {
              "name": "sfdl_prod_drivethru.websites_analytics_demo",
              "type": "drivethrujson"
          },
          "trace_context": {
              "parent_id": 3772762936638509876,
              "service_name": "drive-thru",
              "span_id": 13003083318741893245,
              "trace_id": 10525959978009739773
          }
      }
    • running: False
    • schema_name_id: 3471
    • start_time: 2025-02-27 00:05:05
    • success: False
    • trace_id: 10525959978009739773
    • duration: 0:00:17.179849
    • exit_message:
      {
          "ddl_changed": true,
          "ingested_partitions": 2,
          "partitions_applied": false,
          "success": true
      }
    • id: 137830123
    • payload:
      {
          "datatype_dict": null,
          "file_format": "json",
          "is_schema_datatyped": true,
          "partition": {
              "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26",
              "name": "dt",
              "type": "string",
              "value": "2025-02-26"
          },
          "s3": {
              "bucket": "sfdl-drivethrujson-drivethru-prod",
              "hive_path": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo",
              "key": "raw_incremental/websites_analytics_demo/dt=2025-02-26/events_01_00.json.gz"
          },
          "schema": {
              "name": "sfdl_prod_drivethru.websites_analytics_demo",
              "type": "drivethrujson"
          },
          "trace_context": {
              "parent_id": 11027206222412167201,
              "service_name": "drive-thru",
              "span_id": 1605317348910005844,
              "trace_id": 9036162070658868287
          }
      }
    • running: False
    • schema_name_id: 3471
    • start_time: 2025-02-26 01:06:51
    • success: True
    • trace_id: 9036162070658868287
Partitions:
  • count: 3
  • ddl:
    [
        "ALTER TABLE\n    `sfdl_prod_drivethru.websites_analytics_demo`\nADD\n\n\n PARTITION (dt='2025-02-26') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26'\n PARTITION (dt='2025-02-27') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27'\n PARTITION (dt='2025-02-28') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28'\n;"
    ]
  • name: dt
  • state:
    [
        {
            "info": {
                "id": 1428381,
                "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26",
                "name": "dt",
                "value": "2025-02-26"
            },
            "state": {
                "arrival": {
                    "athena_discovery": true,
                    "first_data": "2025-02-26 00:05:07",
                    "last_data": "2025-02-26 23:05:26"
                },
                "parquet": {
                    "first_performed": null,
                    "in_progress": null,
                    "job_id": null,
                    "last_performed": null,
                    "queued": false
                }
            }
        },
        {
            "info": {
                "id": 1428572,
                "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27",
                "name": "dt",
                "value": "2025-02-27"
            },
            "state": {
                "arrival": {
                    "athena_discovery": false,
                    "first_data": "2025-02-27 00:05:06",
                    "last_data": "2025-02-27 23:06:11"
                },
                "parquet": {
                    "first_performed": null,
                    "in_progress": null,
                    "job_id": null,
                    "last_performed": null,
                    "queued": false
                }
            }
        },
        {
            "info": {
                "id": 1428774,
                "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28",
                "name": "dt",
                "value": "2025-02-28"
            },
            "state": {
                "arrival": {
                    "athena_discovery": false,
                    "first_data": "2025-02-28 00:05:05",
                    "last_data": "2025-02-28 13:06:02"
                },
                "parquet": {
                    "first_performed": null,
                    "in_progress": null,
                    "job_id": null,
                    "last_performed": null,
                    "queued": false
                }
            }
        }
    ]
  • type: string
Schema:
  • attributes:
    {
        "bsPopper": "string",
        "categoryused": "string",
        "companyId": "string",
        "companyid": "string",
        "event": "string",
        "eventlocation": "string",
        "facilityId": "string",
        "facilityid": "string",
        "imagesViewed": "string",
        "name": "string",
        "pageId": "string",
        "pageVersion": "string",
        "pageid": "string",
        "pageversion": "string",
        "sessionId": "string",
        "sessionid": "string",
        "siteId": "string",
        "siteid": "string",
        "testid": "string",
        "timeSpent": "string",
        "timeStamp": "string",
        "timestamp": "string",
        "trackclick": "string",
        "type": "string",
        "unitsshown": "string",
        "userLat": "string",
        "userLong": "string",
        "userlat": "string",
        "userlong": "string",
        "widgetId": "string",
        "widgetid": "string"
    }
  • created: 2025-02-28 03:05:06
  • ddl:
    {
        "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`",
        "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n",
        "drop_table": "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`",
        "repair_table": "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo"
    }
Server:
  • hostname: schemamanager-5475496c57-7dpqb
  • name: schema_manager
  • process_start: 2026-01-09 11:27:47
  • sf_env: prod
  • version: 2112174301
Raw:
{
    "name": {
        "created": "2025-02-25 23:05:13",
        "disabled": false,
        "format": "raw",
        "id": 3471,
        "is_partitioned": true,
        "is_schema_datatyped": true,
        "name": {
            "database": "sfdl_prod_drivethru",
            "table": "websites_analytics_demo"
        },
        "type": "drivethrujson"
    },
    "partition": {
        "count": 3,
        "ddl": [
            "ALTER TABLE\n    `sfdl_prod_drivethru.websites_analytics_demo`\nADD\n\n\n PARTITION (dt='2025-02-26') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26'\n PARTITION (dt='2025-02-27') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27'\n PARTITION (dt='2025-02-28') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28'\n;"
        ],
        "name": "dt",
        "state": [
            {
                "info": {
                    "id": 1428381,
                    "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26",
                    "name": "dt",
                    "value": "2025-02-26"
                },
                "state": {
                    "arrival": {
                        "athena_discovery": true,
                        "first_data": "2025-02-26 00:05:07",
                        "last_data": "2025-02-26 23:05:26"
                    },
                    "parquet": {
                        "first_performed": null,
                        "in_progress": null,
                        "job_id": null,
                        "last_performed": null,
                        "queued": false
                    }
                }
            },
            {
                "info": {
                    "id": 1428572,
                    "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27",
                    "name": "dt",
                    "value": "2025-02-27"
                },
                "state": {
                    "arrival": {
                        "athena_discovery": false,
                        "first_data": "2025-02-27 00:05:06",
                        "last_data": "2025-02-27 23:06:11"
                    },
                    "parquet": {
                        "first_performed": null,
                        "in_progress": null,
                        "job_id": null,
                        "last_performed": null,
                        "queued": false
                    }
                }
            },
            {
                "info": {
                    "id": 1428774,
                    "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28",
                    "name": "dt",
                    "value": "2025-02-28"
                },
                "state": {
                    "arrival": {
                        "athena_discovery": false,
                        "first_data": "2025-02-28 00:05:05",
                        "last_data": "2025-02-28 13:06:02"
                    },
                    "parquet": {
                        "first_performed": null,
                        "in_progress": null,
                        "job_id": null,
                        "last_performed": null,
                        "queued": false
                    }
                }
            }
        ],
        "type": "string"
    },
    "schema": {
        "attributes": {
            "bsPopper": "string",
            "categoryused": "string",
            "companyId": "string",
            "companyid": "string",
            "event": "string",
            "eventlocation": "string",
            "facilityId": "string",
            "facilityid": "string",
            "imagesViewed": "string",
            "name": "string",
            "pageId": "string",
            "pageVersion": "string",
            "pageid": "string",
            "pageversion": "string",
            "sessionId": "string",
            "sessionid": "string",
            "siteId": "string",
            "siteid": "string",
            "testid": "string",
            "timeSpent": "string",
            "timeStamp": "string",
            "timestamp": "string",
            "trackclick": "string",
            "type": "string",
            "unitsshown": "string",
            "userLat": "string",
            "userLong": "string",
            "userlat": "string",
            "userlong": "string",
            "widgetId": "string",
            "widgetid": "string"
        },
        "created": "2025-02-28 03:05:06",
        "ddl": {
            "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`",
            "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n",
            "drop_table": "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`",
            "repair_table": "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo"
        }
    },
    "server": {
        "hostname": "schemamanager-5475496c57-7dpqb",
        "name": "schema_manager",
        "process_start": "2026-01-09 11:27:47",
        "sf_env": "prod",
        "version": "2112174301"
    }
}