Schema Info
Schema Name: sfdl_prod_drivethru.websites_analytics_demo
- created: 2025-02-25 23:05:13
- disabled: False
- format: raw
- id: 3471
- is_partitioned: True
- is_schema_datatyped: True
- name:
{ "database": "sfdl_prod_drivethru", "table": "websites_analytics_demo" } - type: drivethrujson
Schema Versions: 3
-
Version 1209884
- created: 2025-02-28 03:05:06
- ddl:
[ "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`", "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`", "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n", "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo" ] - hive_path: s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo
- id: 1209884
- processed: 0
- raw_data:
{ "bsPopper": "static", "companyId": "7b40898a-c95a-4756-88ee-4729a1275427", "event": "pageView", "facilityId": null, "name": "Home Page", "pageId": "5aa828cb5d4f76061ab1e215", "pageVersion": "31c3bc3f-ad06-4674-9b50-0c1399a146cd", "sessionId": "4hWGsSbyLadOh1aDL6oX_KkfaOphKMsg", "siteId": "5aa828bc7a4768a6789e2340", "timeStamp": "2025-02-28T08:46:31.780Z", "type": "voyager/pages/types/HOME_PAGE", "userLat": 39.0572, "userLong": -94.5933, "widgetId": "9e2b21be-28fb-46ee-9f40-bc38c004853d" } - schema_attributes:
{ "bsPopper": "string", "categoryused": "string", "companyId": "string", "companyid": "string", "event": "string", "eventlocation": "string", "facilityId": "string", "facilityid": "string", "imagesViewed": "string", "name": "string", "pageId": "string", "pageVersion": "string", "pageid": "string", "pageversion": "string", "sessionId": "string", "sessionid": "string", "siteId": "string", "siteid": "string", "testid": "string", "timeSpent": "string", "timeStamp": "string", "timestamp": "string", "trackclick": "string", "type": "string", "unitsshown": "string", "userLat": "string", "userLong": "string", "userlat": "string", "userlong": "string", "widgetId": "string", "widgetid": "string" } - schema_name_id: 3471
- schema_scan_id: 137955199
- updated: None
-
Version 1208390
- created: 2025-02-27 00:05:06
- ddl:
[ "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`", "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`", "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n", "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo" ] - hive_path: s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo
- id: 1208390
- processed: 0
- raw_data:
{ "categoryused": "Small", "companyid": "60f040b3-fbcd-4c2d-892f-62673b3ec7ef", "event": "pageView", "eventlocation": "themeOne", "facilityid": "e0f8e3e4-82f0-43aa-b997-3a9269ea79cc", "name": "Self Storage Units | NJ, NY, PA, and AZ", "pageid": "5bc768f99871560676d89e83", "pageversion": "5d7d8134-5a5b-416f-9b82-2f6c5a5aaaa5", "sessionid": "Sx5W_rlSyBS3rc29VaYsq5YUXSva84lc", "siteid": "5bc768d1417f9dfb8aa5e0e1", "testid": "actions-movein", "timestamp": "2025-02-26T15:12:36.774Z", "trackclick": "move-in", "type": "voyager/pages/types/HOME_PAGE", "unitsshown": 1, "userlat": 39.0572, "userlong": -94.5933, "widgetid": "88f72d83-138f-4e1f-b441-a73afbb18076" } - schema_attributes:
{ "categoryused": "string", "companyId": "string", "companyid": "string", "event": "string", "eventlocation": "string", "facilityId": "string", "facilityid": "string", "imagesViewed": "string", "name": "string", "pageId": "string", "pageVersion": "string", "pageid": "string", "pageversion": "string", "sessionId": "string", "sessionid": "string", "siteId": "string", "siteid": "string", "testid": "string", "timeSpent": "string", "timeStamp": "string", "timestamp": "string", "trackclick": "string", "type": "string", "unitsshown": "string", "userLat": "string", "userLong": "string", "userlat": "string", "userlong": "string", "widgetid": "string" } - schema_name_id: 3471
- schema_scan_id: 137888858
- updated: None
-
Version 1207624
- created: 2025-02-26 01:06:52
- ddl:
[ "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`", "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`", "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`type`STRING,`facilityId`STRING,`name`STRING,`companyId`STRING,`imagesViewed`STRING,`sessionId`STRING,`event`STRING,`timeStamp`STRING,`timeSpent`STRING,`pageId`STRING,`pageVersion`STRING,`userLong`STRING,`siteId`STRING,`userLat`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n", "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo" ] - hive_path: s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo
- id: 1207624
- processed: 1
- raw_data:
{ "companyId": "7b40898a-c95a-4756-88ee-4729a1275427", "event": "pageView", "facilityId": null, "imagesViewed": 5, "name": "All Locations", "pageId": "5aa828cb5d4f76061ab1e216", "pageVersion": "38daf8f9-87e3-4f02-b99e-4eb184b3e9f1", "sessionId": "g6RzU61daclFOkwnddfJG9hJuBzVzNC-", "siteId": "5aa828bc7a4768a6789e2340", "timeSpent": 10, "timeStamp": "2025-02-26T06:46:43.691Z", "type": "voyager/pages/types/FACILITIES_LIST_PAGE", "userLat": 39.0572, "userLong": -94.5933 } - schema_attributes:
{ "companyId": "string", "event": "string", "facilityId": "string", "imagesViewed": "string", "name": "string", "pageId": "string", "pageVersion": "string", "sessionId": "string", "siteId": "string", "timeSpent": "string", "timeStamp": "string", "type": "string", "userLat": "string", "userLong": "string" } - schema_name_id: 3471
- schema_scan_id: 137830123
- updated: 2025-02-26 01:07:05
Schema Scans: 3
Last at 2025-02-28 03:05:05
-
Scan 137955199
- duration: 0:00:08.975242
- exit_message:
{ "exception": "failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: companyid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n 'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\nTraceback (most recent call last):\n File \"/schema_manager/schema_generator/table_generator.py\", line 24, in apply_latest_version\n self.runner.run_multi_query(schema_version.ddl, timeout=DDLQueueWorker.TIMEOUT)\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 133, in run_multi_query\n result = self.run_single_query(query, timeout, database)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 51, in run_single_query\n return self._run_single_query(query, timeout, database)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 120, in _run_single_query\n raise e\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 103, in _run_single_query\n raise AthenaRunnerQueryException('Query failed: {} `{}`'.format(info['QueryExecution']['Status']['StateChangeReason'], query))\nstodl.athena_runner.exceptions.AthenaRunnerQueryException: Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: companyid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n`\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n File \"/schema_manager/schema_generator/schema_scanner.py\", line 102, in scan\n response = service.run()\n ^^^^^^^^^^^^^\n File \"/schema_manager/schema_generator/process_hive_table.py\", line 98, in run\n response['ddl_changed'] = table_generator.run()\n ^^^^^^^^^^^^^^^^^^^^^\n File \"/schema_manager/schema_generator/table_generator.py\", line 35, in run\n self.apply_latest_version()\n File \"/schema_manager/schema_generator/table_generator.py\", line 26, in apply_latest_version\n raise Exception('failed to apply schema `{}`: {}'.format(e, schema_version.ddl))\nException: failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: companyid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n 'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\n: Expecting value: line 1 column 1 (char 0)" } - id: 137955199
- payload:
{ "datatype_dict": null, "file_format": "json", "is_schema_datatyped": true, "partition": { "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28", "name": "dt", "type": "string", "value": "2025-02-28" }, "s3": { "bucket": "sfdl-drivethrujson-drivethru-prod", "hive_path": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo", "key": "raw_incremental/websites_analytics_demo/dt=2025-02-28/events_03_00.json.gz" }, "schema": { "name": "sfdl_prod_drivethru.websites_analytics_demo", "type": "drivethrujson" }, "trace_context": { "parent_id": 17934848701225897929, "service_name": "drive-thru", "span_id": 14735032474575148209, "trace_id": 8828131349323467067 } } - running: False
- schema_name_id: 3471
- start_time: 2025-02-28 03:05:05
- success: False
- trace_id: 8828131349323467067
-
Scan 137888858
- duration: 0:00:11.747622
- exit_message:
{ "exception": "failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: pageid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n 'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\nTraceback (most recent call last):\n File \"/schema_manager/schema_generator/table_generator.py\", line 24, in apply_latest_version\n self.runner.run_multi_query(schema_version.ddl, timeout=DDLQueueWorker.TIMEOUT)\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 133, in run_multi_query\n result = self.run_single_query(query, timeout, database)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 51, in run_single_query\n return self._run_single_query(query, timeout, database)\n ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 120, in _run_single_query\n raise e\n File \"/usr/local/lib/python3.12/site-packages/stodl/athena_runner/runner.py\", line 103, in _run_single_query\n raise AthenaRunnerQueryException('Query failed: {} `{}`'.format(info['QueryExecution']['Status']['StateChangeReason'], query))\nstodl.athena_runner.exceptions.AthenaRunnerQueryException: Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: pageid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n`\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n File \"/schema_manager/schema_generator/schema_scanner.py\", line 102, in scan\n response = service.run()\n ^^^^^^^^^^^^^\n File \"/schema_manager/schema_generator/process_hive_table.py\", line 98, in run\n response['ddl_changed'] = table_generator.run()\n ^^^^^^^^^^^^^^^^^^^^^\n File \"/schema_manager/schema_generator/table_generator.py\", line 35, in run\n self.apply_latest_version()\n File \"/schema_manager/schema_generator/table_generator.py\", line 26, in apply_latest_version\n raise Exception('failed to apply schema `{}`: {}'.format(e, schema_version.ddl))\nException: failed to apply schema `Query failed: FAILED: SemanticException [Error 10036]: Duplicate column name: pageid `CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`', 'DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_drivethru.websites_analytics_demo`\\n(\\n`pageId`STRING,`userLat`STRING,`siteid`STRING,`testid`STRING,`type`STRING,`sessionId`STRING,`eventlocation`STRING,`unitsshown`STRING,`userlong`STRING,`pageid`STRING,`pageVersion`STRING,`sessionid`STRING,`widgetid`STRING,`event`STRING,`name`STRING,`siteId`STRING,`facilityId`STRING,`trackclick`STRING,`companyId`STRING,`timeSpent`STRING,`pageversion`STRING,`imagesViewed`STRING,`companyid`STRING,`timeStamp`STRING,`userLong`STRING,`timestamp`STRING,`categoryused`STRING,`userlat`STRING,`facilityid`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n \\nSTORED AS INPUTFORMAT\\n 'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\\n\", 'MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo']\n: Expecting value: line 1 column 1 (char 0)" } - id: 137888858
- payload:
{ "datatype_dict": null, "file_format": "json", "is_schema_datatyped": true, "partition": { "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27", "name": "dt", "type": "string", "value": "2025-02-27" }, "s3": { "bucket": "sfdl-drivethrujson-drivethru-prod", "hive_path": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo", "key": "raw_incremental/websites_analytics_demo/dt=2025-02-27/events_09_47.json.gz" }, "schema": { "name": "sfdl_prod_drivethru.websites_analytics_demo", "type": "drivethrujson" }, "trace_context": { "parent_id": 3772762936638509876, "service_name": "drive-thru", "span_id": 13003083318741893245, "trace_id": 10525959978009739773 } } - running: False
- schema_name_id: 3471
- start_time: 2025-02-27 00:05:05
- success: False
- trace_id: 10525959978009739773
-
Scan 137830123
- duration: 0:00:17.179849
- exit_message:
{ "ddl_changed": true, "ingested_partitions": 2, "partitions_applied": false, "success": true } - id: 137830123
- payload:
{ "datatype_dict": null, "file_format": "json", "is_schema_datatyped": true, "partition": { "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26", "name": "dt", "type": "string", "value": "2025-02-26" }, "s3": { "bucket": "sfdl-drivethrujson-drivethru-prod", "hive_path": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo", "key": "raw_incremental/websites_analytics_demo/dt=2025-02-26/events_01_00.json.gz" }, "schema": { "name": "sfdl_prod_drivethru.websites_analytics_demo", "type": "drivethrujson" }, "trace_context": { "parent_id": 11027206222412167201, "service_name": "drive-thru", "span_id": 1605317348910005844, "trace_id": 9036162070658868287 } } - running: False
- schema_name_id: 3471
- start_time: 2025-02-26 01:06:51
- success: True
- trace_id: 9036162070658868287
- count: 3
- ddl:
[ "ALTER TABLE\n `sfdl_prod_drivethru.websites_analytics_demo`\nADD\n\n\n PARTITION (dt='2025-02-26') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26'\n PARTITION (dt='2025-02-27') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27'\n PARTITION (dt='2025-02-28') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28'\n;" ] - name: dt
- state:
[ { "info": { "id": 1428381, "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26", "name": "dt", "value": "2025-02-26" }, "state": { "arrival": { "athena_discovery": true, "first_data": "2025-02-26 00:05:07", "last_data": "2025-02-26 23:05:26" }, "parquet": { "first_performed": null, "in_progress": null, "job_id": null, "last_performed": null, "queued": false } } }, { "info": { "id": 1428572, "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27", "name": "dt", "value": "2025-02-27" }, "state": { "arrival": { "athena_discovery": false, "first_data": "2025-02-27 00:05:06", "last_data": "2025-02-27 23:06:11" }, "parquet": { "first_performed": null, "in_progress": null, "job_id": null, "last_performed": null, "queued": false } } }, { "info": { "id": 1428774, "location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28", "name": "dt", "value": "2025-02-28" }, "state": { "arrival": { "athena_discovery": false, "first_data": "2025-02-28 00:05:05", "last_data": "2025-02-28 13:06:02" }, "parquet": { "first_performed": null, "in_progress": null, "job_id": null, "last_performed": null, "queued": false } } } ] - type: string
- attributes:
{ "bsPopper": "string", "categoryused": "string", "companyId": "string", "companyid": "string", "event": "string", "eventlocation": "string", "facilityId": "string", "facilityid": "string", "imagesViewed": "string", "name": "string", "pageId": "string", "pageVersion": "string", "pageid": "string", "pageversion": "string", "sessionId": "string", "sessionid": "string", "siteId": "string", "siteid": "string", "testid": "string", "timeSpent": "string", "timeStamp": "string", "timestamp": "string", "trackclick": "string", "type": "string", "unitsshown": "string", "userLat": "string", "userLong": "string", "userlat": "string", "userlong": "string", "widgetId": "string", "widgetid": "string" } - created: 2025-02-28 03:05:06
- ddl:
{ "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`", "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n", "drop_table": "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`", "repair_table": "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo" }
- hostname: schemamanager-5475496c57-7dpqb
- name: schema_manager
- process_start: 2026-01-09 11:27:47
- sf_env: prod
- version: 2112174301
{
"name": {
"created": "2025-02-25 23:05:13",
"disabled": false,
"format": "raw",
"id": 3471,
"is_partitioned": true,
"is_schema_datatyped": true,
"name": {
"database": "sfdl_prod_drivethru",
"table": "websites_analytics_demo"
},
"type": "drivethrujson"
},
"partition": {
"count": 3,
"ddl": [
"ALTER TABLE\n `sfdl_prod_drivethru.websites_analytics_demo`\nADD\n\n\n PARTITION (dt='2025-02-26') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26'\n PARTITION (dt='2025-02-27') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27'\n PARTITION (dt='2025-02-28') location 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28'\n;"
],
"name": "dt",
"state": [
{
"info": {
"id": 1428381,
"location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-26",
"name": "dt",
"value": "2025-02-26"
},
"state": {
"arrival": {
"athena_discovery": true,
"first_data": "2025-02-26 00:05:07",
"last_data": "2025-02-26 23:05:26"
},
"parquet": {
"first_performed": null,
"in_progress": null,
"job_id": null,
"last_performed": null,
"queued": false
}
}
},
{
"info": {
"id": 1428572,
"location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-27",
"name": "dt",
"value": "2025-02-27"
},
"state": {
"arrival": {
"athena_discovery": false,
"first_data": "2025-02-27 00:05:06",
"last_data": "2025-02-27 23:06:11"
},
"parquet": {
"first_performed": null,
"in_progress": null,
"job_id": null,
"last_performed": null,
"queued": false
}
}
},
{
"info": {
"id": 1428774,
"location": "s3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo/dt=2025-02-28",
"name": "dt",
"value": "2025-02-28"
},
"state": {
"arrival": {
"athena_discovery": false,
"first_data": "2025-02-28 00:05:05",
"last_data": "2025-02-28 13:06:02"
},
"parquet": {
"first_performed": null,
"in_progress": null,
"job_id": null,
"last_performed": null,
"queued": false
}
}
}
],
"type": "string"
},
"schema": {
"attributes": {
"bsPopper": "string",
"categoryused": "string",
"companyId": "string",
"companyid": "string",
"event": "string",
"eventlocation": "string",
"facilityId": "string",
"facilityid": "string",
"imagesViewed": "string",
"name": "string",
"pageId": "string",
"pageVersion": "string",
"pageid": "string",
"pageversion": "string",
"sessionId": "string",
"sessionid": "string",
"siteId": "string",
"siteid": "string",
"testid": "string",
"timeSpent": "string",
"timeStamp": "string",
"timestamp": "string",
"trackclick": "string",
"type": "string",
"unitsshown": "string",
"userLat": "string",
"userLong": "string",
"userlat": "string",
"userlong": "string",
"widgetId": "string",
"widgetid": "string"
},
"created": "2025-02-28 03:05:06",
"ddl": {
"create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_drivethru`",
"create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_drivethru.websites_analytics_demo`\n(\n`imagesViewed`STRING,`type`STRING,`widgetid`STRING,`userLat`STRING,`facilityid`STRING,`timeStamp`STRING,`trackclick`STRING,`name`STRING,`siteId`STRING,`pageid`STRING,`companyId`STRING,`companyid`STRING,`eventlocation`STRING,`pageversion`STRING,`event`STRING,`categoryused`STRING,`facilityId`STRING,`userLong`STRING,`timeSpent`STRING,`sessionId`STRING,`sessionid`STRING,`widgetId`STRING,`pageVersion`STRING,`timestamp`STRING,`testid`STRING,`unitsshown`STRING,`pageId`STRING,`userlat`STRING,`bsPopper`STRING,`userlong`STRING,`siteid`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n \nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-drivethrujson-drivethru-prod/raw_incremental/websites_analytics_demo'\n",
"drop_table": "DROP TABLE IF EXISTS `sfdl_prod_drivethru.websites_analytics_demo`",
"repair_table": "MSCK REPAIR TABLE sfdl_prod_drivethru.websites_analytics_demo"
}
},
"server": {
"hostname": "schemamanager-5475496c57-7dpqb",
"name": "schema_manager",
"process_start": "2026-01-09 11:27:47",
"sf_env": "prod",
"version": "2112174301"
}
}