Schema Info
Schema Name: sfdl_prod_segment.sfdc_identify
- created: 2017-11-02 13:19:40
- disabled: False
- format: raw
- id: 99
- is_partitioned: True
- is_schema_datatyped: False
- name:
{ "database": "sfdl_prod_segment", "table": "sfdc_identify" } - type: segment
Schema Versions: 4
-
Version 142671
- created: 2020-05-14 05:14:31
- ddl:
[ "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`", "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`", "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`channel`STRING,`context`struct<`ip`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`search`:STRING,`referrer`:STRING,`title`:STRING,`path`:STRING>,`userAgent`:STRING,`locale`:STRING>,`traits`struct<`segmentId`:STRING,`timeIncId`:STRING,`googleAnalyticsDaily`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`request_id`:STRING,`timezoneOffset`:STRING,`bizrateId`:STRING,`timezone`:STRING>,`version`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array >,`receivedAt`STRING,`_origin`STRING,`sentAt`STRING,`timestamp`STRING,`projectId`STRING,`messageId`STRING,`type`STRING,`anonymousId`STRING,`userId`STRING,`integrations`struct<`_empty_`:STRING>,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n", "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify" ] - hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
- id: 142671
- processed: 1
- raw_data:
{ "_metadata": { "bundled": [ "FullStory", "Google Analytics", "Segment.io" ], "unbundled": [] }, "_origin": "2020-05-14T10:13:28Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1589414400000/1589450830742.e9a2911a4a83.e4797d0.9044ae27-4171-45f0-96a5-befe2a4e1d65.gz|segment_separator:37", "anonymousId": "07532325-932d-46da-81d3-1855f6a0d71e", "channel": "client", "context": { "ip": "36.110.147.105", "library": { "name": "analytics.js", "version": "3.11.2" }, "locale": "en-US", "page": { "path": "/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/", "referrer": "", "search": "", "title": "New storage operator eyes $500 million in deals", "url": "https://www.sparefoot.com/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/" }, "userAgent": "Mozilla/5.0 (Linux; Android 4.1.2; SHV-E250S Build/JZO54K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.82 Mobile Safari/537.36" }, "integrations": {}, "messageId": "ajs-e0126bdfb5964d2d014f015c8705ae61", "originalTimestamp": "2020-05-14T10:04:51.608Z", "projectId": "SgsnnfL86n", "receivedAt": "2020-05-14T10:04:51.729Z", "sentAt": "2020-05-14T10:04:51.617Z", "timestamp": "2020-05-14T10:04:51.720Z", "traits": { "bizrateId": "15894443025270510144757559904014804", "googleAnalyticsDaily": "GA1.2.1847157563.1589450689", "muuid_date": "1589450686585", "request_id": "e4b5e6ec-4596-48f6-bac8-6468ed6046d4", "segmentId": "07532325-932d-46da-81d3-1855f6a0d71e", "timeIncId": "df4a85d3-d50a-4ca7-9e07-81c6aa8b1b29", "timezone": "asia/shanghai", "timezoneOffset": 8 }, "type": "identify", "userId": null, "version": 2 } - schema_attributes:
{ "_origin": "string", "anonymousId": "string", "channel": "string", "context.campaign.content": "string", "context.campaign.medium": "string", "context.campaign.name": "string", "context.campaign.source": "string", "context.campaign.term": "string", "context.ip": "string", "context.library.name": "string", "context.library.version": "string", "context.locale": "string", "context.page.path": "string", "context.page.referrer": "string", "context.page.search": "string", "context.page.title": "string", "context.page.url": "string", "context.userAgent": "string", "messageId": "string", "originalTimestamp": "string", "projectId": "string", "receivedAt": "string", "sentAt": "string", "timestamp": "string", "traits.bizrateId": "string", "traits.googleAnalyticsDaily": "string", "traits.muuid_date": "string", "traits.pxResult": "string", "traits.request_id": "string", "traits.segmentId": "string", "traits.timeIncId": "string", "traits.timezone": "string", "traits.timezoneOffset": "string", "type": "string", "userId": "string", "version": "string" } - schema_name_id: 99
- schema_scan_id: 36988284
- updated: 2020-05-14 05:24:07
-
Version 142668
- created: 2020-05-14 05:10:52
- ddl:
[ "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`", "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`", "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`context`struct<`page`:struct<`url`:STRING,`path`:STRING,`title`:STRING,`search`:STRING,`referrer`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`locale`:STRING,`ip`:STRING,`userAgent`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>>,`integrations`struct<`_empty_`:STRING>,`_metadata`struct<`bundled`:array,`unbundled`:array >,`receivedAt`STRING,`version`STRING,`traits`struct<`request_id`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`timeIncId`:STRING,`timezone`:STRING,`timezoneOffset`:STRING>,`sentAt`STRING,`projectId`STRING,`userId`STRING,`type`STRING,`anonymousId`STRING,`_origin`STRING,`channel`STRING,`messageId`STRING,`originalTimestamp`STRING,`timestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n", "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify" ] - hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
- id: 142668
- processed: 1
- raw_data:
{ "_metadata": { "bundled": [ "FullStory", "Google Analytics", "Segment.io" ], "unbundled": [] }, "_origin": "2020-05-14T10:10:50Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1589414400000/1589450979710.467b17edabff.e4797d0.eb150bc1-ca33-4ec4-9e92-4aaecf43d664.gz|segment_separator:37", "anonymousId": "07532325-932d-46da-81d3-1855f6a0d71e", "channel": "client", "context": { "ip": "36.110.147.105", "library": { "name": "analytics.js", "version": "3.11.2" }, "locale": "en-US", "page": { "path": "/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/", "referrer": "", "search": "", "title": "New storage operator eyes $500 million in deals", "url": "https://www.sparefoot.com/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/" }, "userAgent": "Mozilla/5.0 (Linux; Android 4.1.2; SHV-E250S Build/JZO54K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.82 Mobile Safari/537.36" }, "integrations": {}, "messageId": "ajs-a136a1dd78e05b41716b91c0cda23ddb", "originalTimestamp": "2020-05-14T10:04:48.374Z", "projectId": "SgsnnfL86n", "receivedAt": "2020-05-14T10:04:49.696Z", "sentAt": "2020-05-14T10:04:48.378Z", "timestamp": "2020-05-14T10:04:49.692Z", "traits": { "muuid_date": "1589450686585", "request_id": "e4b5e6ec-4596-48f6-bac8-6468ed6046d4", "timeIncId": "df4a85d3-d50a-4ca7-9e07-81c6aa8b1b29", "timezone": "asia/shanghai", "timezoneOffset": 8 }, "type": "identify", "userId": null, "version": 2 } - schema_attributes:
{ "_origin": "string", "anonymousId": "string", "channel": "string", "context.campaign.content": "string", "context.campaign.medium": "string", "context.campaign.name": "string", "context.campaign.source": "string", "context.campaign.term": "string", "context.ip": "string", "context.library.name": "string", "context.library.version": "string", "context.locale": "string", "context.page.path": "string", "context.page.referrer": "string", "context.page.search": "string", "context.page.title": "string", "context.page.url": "string", "context.userAgent": "string", "messageId": "string", "originalTimestamp": "string", "projectId": "string", "receivedAt": "string", "sentAt": "string", "timestamp": "string", "traits.muuid_date": "string", "traits.pxResult": "string", "traits.request_id": "string", "traits.timeIncId": "string", "traits.timezone": "string", "traits.timezoneOffset": "string", "type": "string", "userId": "string", "version": "string" } - schema_name_id: 99
- schema_scan_id: 36988158
- updated: 2020-05-14 05:17:46
-
Version 140644
- created: 2020-05-03 15:10:14
- ddl:
[ "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`", "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`", "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`userId`STRING,`projectId`STRING,`timestamp`STRING,`context`struct<`locale`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`ip`:STRING,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`referrer`:STRING,`path`:STRING,`search`:STRING,`title`:STRING>,`userAgent`:STRING>,`version`STRING,`_origin`STRING,`integrations`struct<`_empty_`:STRING>,`traits`struct<`pxResult`:STRING>,`anonymousId`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array >,`channel`STRING,`sentAt`STRING,`receivedAt`STRING,`type`STRING,`messageId`STRING,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n", "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify" ] - hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
- id: 140644
- processed: 1
- raw_data:
{ "_metadata": { "bundled": [ "FullStory", "Google Analytics", "Segment.io" ], "unbundled": [] }, "_origin": "2020-05-03T20:10:09Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1588464000000/1588536500207.b9ae9497e1f7.e4797d0.d638001c-187b-4e2f-9e1f-b655a49be343.gz|segment_separator:37", "anonymousId": "51830313-b4b9-49df-8277-28bea8111887", "channel": "client", "context": { "ip": "36.110.147.91", "library": { "name": "analytics.js", "version": "3.11.2" }, "locale": "en-US", "page": { "path": "/self-storage/news/6407-sold-weekly-self-storage-acquisition-round-up-2-7-18/", "referrer": "", "search": "", "title": "Sold! Weekly Self-Storage Acquisition Round Up 2.7.18 - The SpareFoot Storage Beat", "url": "https://www.sparefoot.com/self-storage/news/6407-sold-weekly-self-storage-acquisition-round-up-2-7-18/" }, "userAgent": "Mozilla/5.0 (Linux; Android 4.1.2; SHV-E250S Build/JZO54K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.82 Mobile Safari/537.36" }, "integrations": {}, "messageId": "ajs-b31d6f4ac78ab0a0150e1e7faa86a443", "originalTimestamp": "2020-05-03T19:57:50.204Z", "projectId": "SgsnnfL86n", "receivedAt": "2020-05-03T19:57:50.315Z", "sentAt": "2020-05-03T19:57:50.207Z", "timestamp": "2020-05-03T19:57:50.312Z", "traits": { "pxResult": "1" }, "type": "identify", "userId": null, "version": 2 } - schema_attributes:
{ "_origin": "string", "anonymousId": "string", "channel": "string", "context.campaign.content": "string", "context.campaign.medium": "string", "context.campaign.name": "string", "context.campaign.source": "string", "context.campaign.term": "string", "context.ip": "string", "context.library.name": "string", "context.library.version": "string", "context.locale": "string", "context.page.path": "string", "context.page.referrer": "string", "context.page.search": "string", "context.page.title": "string", "context.page.url": "string", "context.userAgent": "string", "messageId": "string", "originalTimestamp": "string", "projectId": "string", "receivedAt": "string", "sentAt": "string", "timestamp": "string", "traits.pxResult": "string", "type": "string", "userId": "string", "version": "string" } - schema_name_id: 99
- schema_scan_id: 36414412
- updated: 2020-05-03 15:16:21
-
Version 139907
- created: 2020-04-29 21:13:10
- ddl:
[ "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`", "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`", "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`userId`STRING,`context`struct<`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`page`:struct<`search`:STRING,`title`:STRING,`url`:STRING,`path`:STRING,`referrer`:STRING>,`userAgent`:STRING,`locale`:STRING,`library`:struct<`name`:STRING,`version`:STRING>,`ip`:STRING>,`type`STRING,`messageId`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array >,`anonymousId`STRING,`channel`STRING,`version`STRING,`_origin`STRING,`timestamp`STRING,`traits`struct<`_empty_`:STRING>,`originalTimestamp`STRING,`sentAt`STRING,`projectId`STRING,`receivedAt`STRING,`integrations`struct<`_empty_`:STRING>\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n", "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify" ] - hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
- id: 139907
- processed: 1
- raw_data:
{ "_metadata": { "bundled": [ "FullStory", "Google Analytics", "Segment.io" ], "unbundled": [] }, "_origin": "2020-04-30T02:12:55Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1588204800000/1588212486492.44850b27b671.e4797d0.41da02b5-9766-4264-9731-7c0cca0c31f8.gz|segment_separator:37", "anonymousId": "fcd8fcd4-3785-4ef3-8a23-1edb79662c7c", "channel": "client", "context": { "ip": "172.90.14.242", "library": { "name": "analytics.js", "version": "3.11.4" }, "locale": "en-US", "page": { "path": "/Sun-Valley-CA-self-storage/Trojan-Storage-of-Sun-Valley-65183.html", "referrer": "", "search": "", "title": "Trojan Storage of Sun Valley: Locally Owned | SpareFoot", "url": "https://www.sparefoot.com/Sun-Valley-CA-self-storage/Trojan-Storage-of-Sun-Valley-65183.html" }, "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36" }, "integrations": {}, "messageId": "ajs-3bc8820b1c3f2f119a45012e5476b62f", "originalTimestamp": "2020-04-30T01:40:36.620Z", "projectId": "SgsnnfL86n", "receivedAt": "2020-04-30T01:40:04.566Z", "sentAt": "2020-04-30T01:40:36.624Z", "timestamp": "2020-04-30T01:40:04.562Z", "traits": {}, "type": "identify", "userId": "3759aef943d32f68486972771", "version": 2 } - schema_attributes:
{ "_origin": "string", "anonymousId": "string", "channel": "string", "context.campaign.content": "string", "context.campaign.medium": "string", "context.campaign.name": "string", "context.campaign.source": "string", "context.campaign.term": "string", "context.ip": "string", "context.library.name": "string", "context.library.version": "string", "context.locale": "string", "context.page.path": "string", "context.page.referrer": "string", "context.page.search": "string", "context.page.title": "string", "context.page.url": "string", "context.userAgent": "string", "messageId": "string", "originalTimestamp": "string", "projectId": "string", "receivedAt": "string", "sentAt": "string", "timestamp": "string", "type": "string", "userId": "string", "version": "string" } - schema_name_id: 99
- schema_scan_id: 36216005
- updated: 2020-04-29 21:19:14
Schema Scans: 4
Last at 2020-05-14 05:14:30
-
Scan 36988284
- duration: 0:05:22.167370
- exit_message:
{ "ddl_changed": true, "ingested_partitions": 175, "partitions_applied": false, "success": true } - id: 36988284
- payload:
{ "datatype_dict": { "originalTimestamp": "timestamp_iso8601", "receivedAt": "timestamp_iso8601", "sentAt": "timestamp_iso8601", "timestamp": "timestamp_iso8601" }, "file_format": "json", "partition": { "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-05-14/", "name": "dt", "type": "string", "value": "2020-05-14" }, "s3": { "bucket": "sfdl-segment-sfdc-prod", "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify", "key": "separated_dt/identify/dt=2020-05-14/1589450830742.e9a2911a4a83.e4797d0.9044ae27-4171-45f0-96a5-befe2a4e1d65.json.gz" }, "schema": { "name": "sfdl_prod_segment.sfdc_identify", "type": "segment" } } - running: False
- schema_name_id: 99
- start_time: 2020-05-14 05:14:30
- success: True
- trace_id: 13649003791618932637
-
Scan 36988158
- duration: 0:01:04.333804
- exit_message:
{ "exception": "failed to apply schema `Query failed: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask `MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`', 'DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_segment.sfdc_identify`\\n(\\n`context`struct<`page`:struct<`url`:STRING,`path`:STRING,`title`:STRING,`search`:STRING,`referrer`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`locale`:STRING,`ip`:STRING,`userAgent`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>>,`integrations`struct<`_empty_`:STRING>,`_metadata`struct<`bundled`:array,`unbundled`:array >,`receivedAt`STRING,`version`STRING,`traits`struct<`request_id`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`timeIncId`:STRING,`timezone`:STRING,`timezoneOffset`:STRING>,`sentAt`STRING,`projectId`STRING,`userId`STRING,`type`STRING,`anonymousId`STRING,`_origin`STRING,`channel`STRING,`messageId`STRING,`originalTimestamp`STRING,`timestamp`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\\nSTORED AS INPUTFORMAT\\n 'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\\n\", 'MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify']\nTraceback (most recent call last):\n File \"/schema_manager/schema_generator/table_generator.py\", line 24, in apply_latest_version\n self.runner.run_multi_query(schema_version.ddl, timeout=DDLQueueWorker.TIMEOUT)\n File \"/src/athena-runner/athena_runner/runner.py\", line 133, in run_multi_query\n result = self.run_single_query(query, timeout, database)\n File \"/src/athena-runner/athena_runner/runner.py\", line 51, in run_single_query\n return self._run_single_query(query, timeout, database)\n File \"/src/athena-runner/athena_runner/runner.py\", line 120, in _run_single_query\n raise e\n File \"/src/athena-runner/athena_runner/runner.py\", line 103, in _run_single_query\n raise AthenaRunnerQueryException('Query failed: {} `{}`'.format(info['QueryExecution']['Status']['StateChangeReason'], query))\nathena_runner.exceptions.AthenaRunnerQueryException: Query failed: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask `MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify`\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n File \"/schema_manager/schema_generator/schema_scanner.py\", line 102, in scan\n response = service.run()\n File \"/schema_manager/schema_generator/process_hive_table.py\", line 95, in run\n response['ddl_changed'] = table_generator.run()\n File \"/schema_manager/schema_generator/table_generator.py\", line 35, in run\n self.apply_latest_version()\n File \"/schema_manager/schema_generator/table_generator.py\", line 26, in apply_latest_version\n raise Exception('failed to apply schema `{}`: {}'.format(e, schema_version.ddl))\nException: failed to apply schema `Query failed: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask `MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`', 'DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_segment.sfdc_identify`\\n(\\n`context`struct<`page`:struct<`url`:STRING,`path`:STRING,`title`:STRING,`search`:STRING,`referrer`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`locale`:STRING,`ip`:STRING,`userAgent`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>>,`integrations`struct<`_empty_`:STRING>,`_metadata`struct<`bundled`:array ,`unbundled`:array >,`receivedAt`STRING,`version`STRING,`traits`struct<`request_id`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`timeIncId`:STRING,`timezone`:STRING,`timezoneOffset`:STRING>,`sentAt`STRING,`projectId`STRING,`userId`STRING,`type`STRING,`anonymousId`STRING,`_origin`STRING,`channel`STRING,`messageId`STRING,`originalTimestamp`STRING,`timestamp`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\\nSTORED AS INPUTFORMAT\\n 'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\\n\", 'MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify']\n: Expecting value: line 1 column 1 (char 0)" } - id: 36988158
- payload:
{ "datatype_dict": { "originalTimestamp": "timestamp_iso8601", "receivedAt": "timestamp_iso8601", "sentAt": "timestamp_iso8601", "timestamp": "timestamp_iso8601" }, "file_format": "json", "partition": { "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-05-14/", "name": "dt", "type": "string", "value": "2020-05-14" }, "s3": { "bucket": "sfdl-segment-sfdc-prod", "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify", "key": "separated_dt/identify/dt=2020-05-14/1589450979710.467b17edabff.e4797d0.eb150bc1-ca33-4ec4-9e92-4aaecf43d664.json.gz" }, "schema": { "name": "sfdl_prod_segment.sfdc_identify", "type": "segment" } } - running: False
- schema_name_id: 99
- start_time: 2020-05-14 05:10:51
- success: False
- trace_id: 10699749574951355238
-
Scan 36414412
- duration: 0:02:04.261037
- exit_message:
{ "ddl_changed": true, "ingested_partitions": 519, "partitions_applied": false, "success": true } - id: 36414412
- payload:
{ "datatype_dict": { "originalTimestamp": "timestamp_iso8601", "receivedAt": "timestamp_iso8601", "sentAt": "timestamp_iso8601", "timestamp": "timestamp_iso8601" }, "file_format": "json", "partition": { "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-05-03/", "name": "dt", "type": "string", "value": "2020-05-03" }, "s3": { "bucket": "sfdl-segment-sfdc-prod", "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify", "key": "separated_dt/identify/dt=2020-05-03/1588536500207.b9ae9497e1f7.e4797d0.d638001c-187b-4e2f-9e1f-b655a49be343.json.gz" }, "schema": { "name": "sfdl_prod_segment.sfdc_identify", "type": "segment" } } - running: False
- schema_name_id: 99
- start_time: 2020-05-03 15:10:14
- success: True
- trace_id: 18106067313119164192
-
Scan 36216005
- duration: 0:02:26.451742
- exit_message:
{ "ddl_changed": true, "ingested_partitions": 518, "partitions_applied": false, "success": true } - id: 36216005
- payload:
{ "datatype_dict": { "originalTimestamp": "timestamp_iso8601", "receivedAt": "timestamp_iso8601", "sentAt": "timestamp_iso8601", "timestamp": "timestamp_iso8601" }, "file_format": "json", "partition": { "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-04-30/", "name": "dt", "type": "string", "value": "2020-04-30" }, "s3": { "bucket": "sfdl-segment-sfdc-prod", "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify", "key": "separated_dt/identify/dt=2020-04-30/1588212486492.44850b27b671.e4797d0.41da02b5-9766-4264-9731-7c0cca0c31f8.json.gz" }, "schema": { "name": "sfdl_prod_segment.sfdc_identify", "type": "segment" } } - running: False
- schema_name_id: 99
- start_time: 2020-04-29 21:13:10
- success: True
- trace_id: 7361690129348836688
- count: 3
- ddl:
[ "ALTER TABLE\n `sfdl_prod_segment.sfdc_identify`\nADD\n\n\n PARTITION (dt='2023-06-04') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/'\n PARTITION (dt='2023-06-06') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/'\n PARTITION (dt='2023-06-08') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/'\n;" ] - name: dt
- state:
[ { "info": { "id": 927197, "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/", "name": "dt", "value": "2023-06-04" }, "state": { "arrival": { "athena_discovery": true, "first_data": "2023-06-03 23:18:49", "last_data": "2023-06-03 23:18:49" }, "parquet": { "first_performed": null, "in_progress": null, "job_id": null, "last_performed": null, "queued": true } } }, { "info": { "id": 927682, "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/", "name": "dt", "value": "2023-06-06" }, "state": { "arrival": { "athena_discovery": true, "first_data": "2023-06-06 10:56:35", "last_data": "2023-06-06 11:04:33" }, "parquet": { "first_performed": null, "in_progress": null, "job_id": null, "last_performed": null, "queued": true } } }, { "info": { "id": 928019, "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/", "name": "dt", "value": "2023-06-08" }, "state": { "arrival": { "athena_discovery": true, "first_data": "2023-06-08 06:27:48", "last_data": "2023-06-08 06:27:48" }, "parquet": { "first_performed": null, "in_progress": null, "job_id": null, "last_performed": null, "queued": true } } } ] - type: string
- attributes:
{ "_origin": "string", "anonymousId": "string", "channel": "string", "context.campaign.content": "string", "context.campaign.medium": "string", "context.campaign.name": "string", "context.campaign.source": "string", "context.campaign.term": "string", "context.ip": "string", "context.library.name": "string", "context.library.version": "string", "context.locale": "string", "context.page.path": "string", "context.page.referrer": "string", "context.page.search": "string", "context.page.title": "string", "context.page.url": "string", "context.userAgent": "string", "messageId": "string", "originalTimestamp": "string", "projectId": "string", "receivedAt": "string", "sentAt": "string", "timestamp": "string", "traits.bizrateId": "string", "traits.googleAnalyticsDaily": "string", "traits.muuid_date": "string", "traits.pxResult": "string", "traits.request_id": "string", "traits.segmentId": "string", "traits.timeIncId": "string", "traits.timezone": "string", "traits.timezoneOffset": "string", "type": "string", "userId": "string", "version": "string" } - created: 2020-05-14 05:14:31
- ddl:
{ "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`", "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`channel`STRING,`context`struct<`ip`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`search`:STRING,`referrer`:STRING,`title`:STRING,`path`:STRING>,`userAgent`:STRING,`locale`:STRING>,`traits`struct<`segmentId`:STRING,`timeIncId`:STRING,`googleAnalyticsDaily`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`request_id`:STRING,`timezoneOffset`:STRING,`bizrateId`:STRING,`timezone`:STRING>,`version`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array >,`receivedAt`STRING,`_origin`STRING,`sentAt`STRING,`timestamp`STRING,`projectId`STRING,`messageId`STRING,`type`STRING,`anonymousId`STRING,`userId`STRING,`integrations`struct<`_empty_`:STRING>,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n", "drop_table": "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`", "repair_table": "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify" }
- hostname: schemamanager-5475496c57-7dpqb
- name: schema_manager
- process_start: 2026-01-09 11:27:47
- sf_env: prod
- version: 2112174301
{
"name": {
"created": "2017-11-02 13:19:40",
"disabled": false,
"format": "raw",
"id": 99,
"is_partitioned": true,
"is_schema_datatyped": false,
"name": {
"database": "sfdl_prod_segment",
"table": "sfdc_identify"
},
"type": "segment"
},
"partition": {
"count": 3,
"ddl": [
"ALTER TABLE\n `sfdl_prod_segment.sfdc_identify`\nADD\n\n\n PARTITION (dt='2023-06-04') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/'\n PARTITION (dt='2023-06-06') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/'\n PARTITION (dt='2023-06-08') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/'\n;"
],
"name": "dt",
"state": [
{
"info": {
"id": 927197,
"location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/",
"name": "dt",
"value": "2023-06-04"
},
"state": {
"arrival": {
"athena_discovery": true,
"first_data": "2023-06-03 23:18:49",
"last_data": "2023-06-03 23:18:49"
},
"parquet": {
"first_performed": null,
"in_progress": null,
"job_id": null,
"last_performed": null,
"queued": true
}
}
},
{
"info": {
"id": 927682,
"location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/",
"name": "dt",
"value": "2023-06-06"
},
"state": {
"arrival": {
"athena_discovery": true,
"first_data": "2023-06-06 10:56:35",
"last_data": "2023-06-06 11:04:33"
},
"parquet": {
"first_performed": null,
"in_progress": null,
"job_id": null,
"last_performed": null,
"queued": true
}
}
},
{
"info": {
"id": 928019,
"location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/",
"name": "dt",
"value": "2023-06-08"
},
"state": {
"arrival": {
"athena_discovery": true,
"first_data": "2023-06-08 06:27:48",
"last_data": "2023-06-08 06:27:48"
},
"parquet": {
"first_performed": null,
"in_progress": null,
"job_id": null,
"last_performed": null,
"queued": true
}
}
}
],
"type": "string"
},
"schema": {
"attributes": {
"_origin": "string",
"anonymousId": "string",
"channel": "string",
"context.campaign.content": "string",
"context.campaign.medium": "string",
"context.campaign.name": "string",
"context.campaign.source": "string",
"context.campaign.term": "string",
"context.ip": "string",
"context.library.name": "string",
"context.library.version": "string",
"context.locale": "string",
"context.page.path": "string",
"context.page.referrer": "string",
"context.page.search": "string",
"context.page.title": "string",
"context.page.url": "string",
"context.userAgent": "string",
"messageId": "string",
"originalTimestamp": "string",
"projectId": "string",
"receivedAt": "string",
"sentAt": "string",
"timestamp": "string",
"traits.bizrateId": "string",
"traits.googleAnalyticsDaily": "string",
"traits.muuid_date": "string",
"traits.pxResult": "string",
"traits.request_id": "string",
"traits.segmentId": "string",
"traits.timeIncId": "string",
"traits.timezone": "string",
"traits.timezoneOffset": "string",
"type": "string",
"userId": "string",
"version": "string"
},
"created": "2020-05-14 05:14:31",
"ddl": {
"create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`",
"create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`channel`STRING,`context`struct<`ip`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`search`:STRING,`referrer`:STRING,`title`:STRING,`path`:STRING>,`userAgent`:STRING,`locale`:STRING>,`traits`struct<`segmentId`:STRING,`timeIncId`:STRING,`googleAnalyticsDaily`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`request_id`:STRING,`timezoneOffset`:STRING,`bizrateId`:STRING,`timezone`:STRING>,`version`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array>,`receivedAt`STRING,`_origin`STRING,`sentAt`STRING,`timestamp`STRING,`projectId`STRING,`messageId`STRING,`type`STRING,`anonymousId`STRING,`userId`STRING,`integrations`struct<`_empty_`:STRING>,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n 'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n 's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n",
"drop_table": "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`",
"repair_table": "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify"
}
},
"server": {
"hostname": "schemamanager-5475496c57-7dpqb",
"name": "schema_manager",
"process_start": "2026-01-09 11:27:47",
"sf_env": "prod",
"version": "2112174301"
}
}