Schema Info

Schema Name: sfdl_prod_segment.sfdc_identify
  • created: 2017-11-02 13:19:40
  • disabled: False
  • format: raw
  • id: 99
  • is_partitioned: True
  • is_schema_datatyped: False
  • name:
    {
        "database": "sfdl_prod_segment",
        "table": "sfdc_identify"
    }
  • type: segment
Schema Versions: 4
    • created: 2020-05-14 05:14:31
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`",
          "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`channel`STRING,`context`struct<`ip`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`search`:STRING,`referrer`:STRING,`title`:STRING,`path`:STRING>,`userAgent`:STRING,`locale`:STRING>,`traits`struct<`segmentId`:STRING,`timeIncId`:STRING,`googleAnalyticsDaily`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`request_id`:STRING,`timezoneOffset`:STRING,`bizrateId`:STRING,`timezone`:STRING>,`version`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array>,`receivedAt`STRING,`_origin`STRING,`sentAt`STRING,`timestamp`STRING,`projectId`STRING,`messageId`STRING,`type`STRING,`anonymousId`STRING,`userId`STRING,`integrations`struct<`_empty_`:STRING>,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n",
          "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify"
      ]
    • hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
    • id: 142671
    • processed: 1
    • raw_data:
      {
          "_metadata": {
              "bundled": [
                  "FullStory",
                  "Google Analytics",
                  "Segment.io"
              ],
              "unbundled": []
          },
          "_origin": "2020-05-14T10:13:28Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1589414400000/1589450830742.e9a2911a4a83.e4797d0.9044ae27-4171-45f0-96a5-befe2a4e1d65.gz|segment_separator:37",
          "anonymousId": "07532325-932d-46da-81d3-1855f6a0d71e",
          "channel": "client",
          "context": {
              "ip": "36.110.147.105",
              "library": {
                  "name": "analytics.js",
                  "version": "3.11.2"
              },
              "locale": "en-US",
              "page": {
                  "path": "/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/",
                  "referrer": "",
                  "search": "",
                  "title": "New storage operator eyes $500 million in deals",
                  "url": "https://www.sparefoot.com/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/"
              },
              "userAgent": "Mozilla/5.0 (Linux; Android 4.1.2; SHV-E250S Build/JZO54K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.82 Mobile Safari/537.36"
          },
          "integrations": {},
          "messageId": "ajs-e0126bdfb5964d2d014f015c8705ae61",
          "originalTimestamp": "2020-05-14T10:04:51.608Z",
          "projectId": "SgsnnfL86n",
          "receivedAt": "2020-05-14T10:04:51.729Z",
          "sentAt": "2020-05-14T10:04:51.617Z",
          "timestamp": "2020-05-14T10:04:51.720Z",
          "traits": {
              "bizrateId": "15894443025270510144757559904014804",
              "googleAnalyticsDaily": "GA1.2.1847157563.1589450689",
              "muuid_date": "1589450686585",
              "request_id": "e4b5e6ec-4596-48f6-bac8-6468ed6046d4",
              "segmentId": "07532325-932d-46da-81d3-1855f6a0d71e",
              "timeIncId": "df4a85d3-d50a-4ca7-9e07-81c6aa8b1b29",
              "timezone": "asia/shanghai",
              "timezoneOffset": 8
          },
          "type": "identify",
          "userId": null,
          "version": 2
      }
    • schema_attributes:
      {
          "_origin": "string",
          "anonymousId": "string",
          "channel": "string",
          "context.campaign.content": "string",
          "context.campaign.medium": "string",
          "context.campaign.name": "string",
          "context.campaign.source": "string",
          "context.campaign.term": "string",
          "context.ip": "string",
          "context.library.name": "string",
          "context.library.version": "string",
          "context.locale": "string",
          "context.page.path": "string",
          "context.page.referrer": "string",
          "context.page.search": "string",
          "context.page.title": "string",
          "context.page.url": "string",
          "context.userAgent": "string",
          "messageId": "string",
          "originalTimestamp": "string",
          "projectId": "string",
          "receivedAt": "string",
          "sentAt": "string",
          "timestamp": "string",
          "traits.bizrateId": "string",
          "traits.googleAnalyticsDaily": "string",
          "traits.muuid_date": "string",
          "traits.pxResult": "string",
          "traits.request_id": "string",
          "traits.segmentId": "string",
          "traits.timeIncId": "string",
          "traits.timezone": "string",
          "traits.timezoneOffset": "string",
          "type": "string",
          "userId": "string",
          "version": "string"
      }
    • schema_name_id: 99
    • schema_scan_id: 36988284
    • updated: 2020-05-14 05:24:07
    • created: 2020-05-14 05:10:52
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`",
          "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`context`struct<`page`:struct<`url`:STRING,`path`:STRING,`title`:STRING,`search`:STRING,`referrer`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`locale`:STRING,`ip`:STRING,`userAgent`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>>,`integrations`struct<`_empty_`:STRING>,`_metadata`struct<`bundled`:array,`unbundled`:array>,`receivedAt`STRING,`version`STRING,`traits`struct<`request_id`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`timeIncId`:STRING,`timezone`:STRING,`timezoneOffset`:STRING>,`sentAt`STRING,`projectId`STRING,`userId`STRING,`type`STRING,`anonymousId`STRING,`_origin`STRING,`channel`STRING,`messageId`STRING,`originalTimestamp`STRING,`timestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n",
          "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify"
      ]
    • hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
    • id: 142668
    • processed: 1
    • raw_data:
      {
          "_metadata": {
              "bundled": [
                  "FullStory",
                  "Google Analytics",
                  "Segment.io"
              ],
              "unbundled": []
          },
          "_origin": "2020-05-14T10:10:50Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1589414400000/1589450979710.467b17edabff.e4797d0.eb150bc1-ca33-4ec4-9e92-4aaecf43d664.gz|segment_separator:37",
          "anonymousId": "07532325-932d-46da-81d3-1855f6a0d71e",
          "channel": "client",
          "context": {
              "ip": "36.110.147.105",
              "library": {
                  "name": "analytics.js",
                  "version": "3.11.2"
              },
              "locale": "en-US",
              "page": {
                  "path": "/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/",
                  "referrer": "",
                  "search": "",
                  "title": "New storage operator eyes $500 million in deals",
                  "url": "https://www.sparefoot.com/self-storage/news/500-premier-storage-investors-aims-for-500-million-in-deals/"
              },
              "userAgent": "Mozilla/5.0 (Linux; Android 4.1.2; SHV-E250S Build/JZO54K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.82 Mobile Safari/537.36"
          },
          "integrations": {},
          "messageId": "ajs-a136a1dd78e05b41716b91c0cda23ddb",
          "originalTimestamp": "2020-05-14T10:04:48.374Z",
          "projectId": "SgsnnfL86n",
          "receivedAt": "2020-05-14T10:04:49.696Z",
          "sentAt": "2020-05-14T10:04:48.378Z",
          "timestamp": "2020-05-14T10:04:49.692Z",
          "traits": {
              "muuid_date": "1589450686585",
              "request_id": "e4b5e6ec-4596-48f6-bac8-6468ed6046d4",
              "timeIncId": "df4a85d3-d50a-4ca7-9e07-81c6aa8b1b29",
              "timezone": "asia/shanghai",
              "timezoneOffset": 8
          },
          "type": "identify",
          "userId": null,
          "version": 2
      }
    • schema_attributes:
      {
          "_origin": "string",
          "anonymousId": "string",
          "channel": "string",
          "context.campaign.content": "string",
          "context.campaign.medium": "string",
          "context.campaign.name": "string",
          "context.campaign.source": "string",
          "context.campaign.term": "string",
          "context.ip": "string",
          "context.library.name": "string",
          "context.library.version": "string",
          "context.locale": "string",
          "context.page.path": "string",
          "context.page.referrer": "string",
          "context.page.search": "string",
          "context.page.title": "string",
          "context.page.url": "string",
          "context.userAgent": "string",
          "messageId": "string",
          "originalTimestamp": "string",
          "projectId": "string",
          "receivedAt": "string",
          "sentAt": "string",
          "timestamp": "string",
          "traits.muuid_date": "string",
          "traits.pxResult": "string",
          "traits.request_id": "string",
          "traits.timeIncId": "string",
          "traits.timezone": "string",
          "traits.timezoneOffset": "string",
          "type": "string",
          "userId": "string",
          "version": "string"
      }
    • schema_name_id: 99
    • schema_scan_id: 36988158
    • updated: 2020-05-14 05:17:46
    • created: 2020-05-03 15:10:14
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`",
          "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`userId`STRING,`projectId`STRING,`timestamp`STRING,`context`struct<`locale`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`ip`:STRING,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`referrer`:STRING,`path`:STRING,`search`:STRING,`title`:STRING>,`userAgent`:STRING>,`version`STRING,`_origin`STRING,`integrations`struct<`_empty_`:STRING>,`traits`struct<`pxResult`:STRING>,`anonymousId`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array>,`channel`STRING,`sentAt`STRING,`receivedAt`STRING,`type`STRING,`messageId`STRING,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n",
          "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify"
      ]
    • hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
    • id: 140644
    • processed: 1
    • raw_data:
      {
          "_metadata": {
              "bundled": [
                  "FullStory",
                  "Google Analytics",
                  "Segment.io"
              ],
              "unbundled": []
          },
          "_origin": "2020-05-03T20:10:09Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1588464000000/1588536500207.b9ae9497e1f7.e4797d0.d638001c-187b-4e2f-9e1f-b655a49be343.gz|segment_separator:37",
          "anonymousId": "51830313-b4b9-49df-8277-28bea8111887",
          "channel": "client",
          "context": {
              "ip": "36.110.147.91",
              "library": {
                  "name": "analytics.js",
                  "version": "3.11.2"
              },
              "locale": "en-US",
              "page": {
                  "path": "/self-storage/news/6407-sold-weekly-self-storage-acquisition-round-up-2-7-18/",
                  "referrer": "",
                  "search": "",
                  "title": "Sold! Weekly Self-Storage Acquisition Round Up 2.7.18 - The SpareFoot Storage Beat",
                  "url": "https://www.sparefoot.com/self-storage/news/6407-sold-weekly-self-storage-acquisition-round-up-2-7-18/"
              },
              "userAgent": "Mozilla/5.0 (Linux; Android 4.1.2; SHV-E250S Build/JZO54K) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.82 Mobile Safari/537.36"
          },
          "integrations": {},
          "messageId": "ajs-b31d6f4ac78ab0a0150e1e7faa86a443",
          "originalTimestamp": "2020-05-03T19:57:50.204Z",
          "projectId": "SgsnnfL86n",
          "receivedAt": "2020-05-03T19:57:50.315Z",
          "sentAt": "2020-05-03T19:57:50.207Z",
          "timestamp": "2020-05-03T19:57:50.312Z",
          "traits": {
              "pxResult": "1"
          },
          "type": "identify",
          "userId": null,
          "version": 2
      }
    • schema_attributes:
      {
          "_origin": "string",
          "anonymousId": "string",
          "channel": "string",
          "context.campaign.content": "string",
          "context.campaign.medium": "string",
          "context.campaign.name": "string",
          "context.campaign.source": "string",
          "context.campaign.term": "string",
          "context.ip": "string",
          "context.library.name": "string",
          "context.library.version": "string",
          "context.locale": "string",
          "context.page.path": "string",
          "context.page.referrer": "string",
          "context.page.search": "string",
          "context.page.title": "string",
          "context.page.url": "string",
          "context.userAgent": "string",
          "messageId": "string",
          "originalTimestamp": "string",
          "projectId": "string",
          "receivedAt": "string",
          "sentAt": "string",
          "timestamp": "string",
          "traits.pxResult": "string",
          "type": "string",
          "userId": "string",
          "version": "string"
      }
    • schema_name_id: 99
    • schema_scan_id: 36414412
    • updated: 2020-05-03 15:16:21
    • created: 2020-04-29 21:13:10
    • ddl:
      [
          "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`",
          "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`",
          "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`userId`STRING,`context`struct<`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`page`:struct<`search`:STRING,`title`:STRING,`url`:STRING,`path`:STRING,`referrer`:STRING>,`userAgent`:STRING,`locale`:STRING,`library`:struct<`name`:STRING,`version`:STRING>,`ip`:STRING>,`type`STRING,`messageId`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array>,`anonymousId`STRING,`channel`STRING,`version`STRING,`_origin`STRING,`timestamp`STRING,`traits`struct<`_empty_`:STRING>,`originalTimestamp`STRING,`sentAt`STRING,`projectId`STRING,`receivedAt`STRING,`integrations`struct<`_empty_`:STRING>\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n",
          "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify"
      ]
    • hive_path: s3://sfdl-segment-sfdc-prod/separated_dt/identify
    • id: 139907
    • processed: 1
    • raw_data:
      {
          "_metadata": {
              "bundled": [
                  "FullStory",
                  "Google Analytics",
                  "Segment.io"
              ],
              "unbundled": []
          },
          "_origin": "2020-04-30T02:12:55Z|s3://sfdl-segment-sfdc-prod/segment-logs/SgsnnfL86n/1588204800000/1588212486492.44850b27b671.e4797d0.41da02b5-9766-4264-9731-7c0cca0c31f8.gz|segment_separator:37",
          "anonymousId": "fcd8fcd4-3785-4ef3-8a23-1edb79662c7c",
          "channel": "client",
          "context": {
              "ip": "172.90.14.242",
              "library": {
                  "name": "analytics.js",
                  "version": "3.11.4"
              },
              "locale": "en-US",
              "page": {
                  "path": "/Sun-Valley-CA-self-storage/Trojan-Storage-of-Sun-Valley-65183.html",
                  "referrer": "",
                  "search": "",
                  "title": "Trojan Storage of Sun Valley: Locally Owned | SpareFoot",
                  "url": "https://www.sparefoot.com/Sun-Valley-CA-self-storage/Trojan-Storage-of-Sun-Valley-65183.html"
              },
              "userAgent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.132 Safari/537.36"
          },
          "integrations": {},
          "messageId": "ajs-3bc8820b1c3f2f119a45012e5476b62f",
          "originalTimestamp": "2020-04-30T01:40:36.620Z",
          "projectId": "SgsnnfL86n",
          "receivedAt": "2020-04-30T01:40:04.566Z",
          "sentAt": "2020-04-30T01:40:36.624Z",
          "timestamp": "2020-04-30T01:40:04.562Z",
          "traits": {},
          "type": "identify",
          "userId": "3759aef943d32f68486972771",
          "version": 2
      }
    • schema_attributes:
      {
          "_origin": "string",
          "anonymousId": "string",
          "channel": "string",
          "context.campaign.content": "string",
          "context.campaign.medium": "string",
          "context.campaign.name": "string",
          "context.campaign.source": "string",
          "context.campaign.term": "string",
          "context.ip": "string",
          "context.library.name": "string",
          "context.library.version": "string",
          "context.locale": "string",
          "context.page.path": "string",
          "context.page.referrer": "string",
          "context.page.search": "string",
          "context.page.title": "string",
          "context.page.url": "string",
          "context.userAgent": "string",
          "messageId": "string",
          "originalTimestamp": "string",
          "projectId": "string",
          "receivedAt": "string",
          "sentAt": "string",
          "timestamp": "string",
          "type": "string",
          "userId": "string",
          "version": "string"
      }
    • schema_name_id: 99
    • schema_scan_id: 36216005
    • updated: 2020-04-29 21:19:14
Schema Scans: 4
Last at 2020-05-14 05:14:30
    • duration: 0:05:22.167370
    • exit_message:
      {
          "ddl_changed": true,
          "ingested_partitions": 175,
          "partitions_applied": false,
          "success": true
      }
    • id: 36988284
    • payload:
      {
          "datatype_dict": {
              "originalTimestamp": "timestamp_iso8601",
              "receivedAt": "timestamp_iso8601",
              "sentAt": "timestamp_iso8601",
              "timestamp": "timestamp_iso8601"
          },
          "file_format": "json",
          "partition": {
              "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-05-14/",
              "name": "dt",
              "type": "string",
              "value": "2020-05-14"
          },
          "s3": {
              "bucket": "sfdl-segment-sfdc-prod",
              "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify",
              "key": "separated_dt/identify/dt=2020-05-14/1589450830742.e9a2911a4a83.e4797d0.9044ae27-4171-45f0-96a5-befe2a4e1d65.json.gz"
          },
          "schema": {
              "name": "sfdl_prod_segment.sfdc_identify",
              "type": "segment"
          }
      }
    • running: False
    • schema_name_id: 99
    • start_time: 2020-05-14 05:14:30
    • success: True
    • trace_id: 13649003791618932637
    • duration: 0:01:04.333804
    • exit_message:
      {
          "exception": "failed to apply schema `Query failed: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask `MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`', 'DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_segment.sfdc_identify`\\n(\\n`context`struct<`page`:struct<`url`:STRING,`path`:STRING,`title`:STRING,`search`:STRING,`referrer`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`locale`:STRING,`ip`:STRING,`userAgent`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>>,`integrations`struct<`_empty_`:STRING>,`_metadata`struct<`bundled`:array,`unbundled`:array>,`receivedAt`STRING,`version`STRING,`traits`struct<`request_id`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`timeIncId`:STRING,`timezone`:STRING,`timezoneOffset`:STRING>,`sentAt`STRING,`projectId`STRING,`userId`STRING,`type`STRING,`anonymousId`STRING,`_origin`STRING,`channel`STRING,`messageId`STRING,`originalTimestamp`STRING,`timestamp`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\\n\", 'MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify']\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/table_generator.py\", line 24, in apply_latest_version\n    self.runner.run_multi_query(schema_version.ddl, timeout=DDLQueueWorker.TIMEOUT)\n  File \"/src/athena-runner/athena_runner/runner.py\", line 133, in run_multi_query\n    result = self.run_single_query(query, timeout, database)\n  File \"/src/athena-runner/athena_runner/runner.py\", line 51, in run_single_query\n    return self._run_single_query(query, timeout, database)\n  File \"/src/athena-runner/athena_runner/runner.py\", line 120, in _run_single_query\n    raise e\n  File \"/src/athena-runner/athena_runner/runner.py\", line 103, in _run_single_query\n    raise AthenaRunnerQueryException('Query failed: {} `{}`'.format(info['QueryExecution']['Status']['StateChangeReason'], query))\nathena_runner.exceptions.AthenaRunnerQueryException: Query failed: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask `MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify`\n\nDuring handling of the above exception, another exception occurred:\n\nTraceback (most recent call last):\n  File \"/schema_manager/schema_generator/schema_scanner.py\", line 102, in scan\n    response = service.run()\n  File \"/schema_manager/schema_generator/process_hive_table.py\", line 95, in run\n    response['ddl_changed'] = table_generator.run()\n  File \"/schema_manager/schema_generator/table_generator.py\", line 35, in run\n    self.apply_latest_version()\n  File \"/schema_manager/schema_generator/table_generator.py\", line 26, in apply_latest_version\n    raise Exception('failed to apply schema `{}`: {}'.format(e, schema_version.ddl))\nException: failed to apply schema `Query failed: FAILED: Execution Error, return code 1 from org.apache.hadoop.hive.ql.exec.DDLTask `MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify``: ['CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`', 'DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`', \"CREATE EXTERNAL TABLE IF NOT EXISTS\\n`sfdl_prod_segment.sfdc_identify`\\n(\\n`context`struct<`page`:struct<`url`:STRING,`path`:STRING,`title`:STRING,`search`:STRING,`referrer`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`locale`:STRING,`ip`:STRING,`userAgent`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>>,`integrations`struct<`_empty_`:STRING>,`_metadata`struct<`bundled`:array,`unbundled`:array>,`receivedAt`STRING,`version`STRING,`traits`struct<`request_id`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`timeIncId`:STRING,`timezone`:STRING,`timezoneOffset`:STRING>,`sentAt`STRING,`projectId`STRING,`userId`STRING,`type`STRING,`anonymousId`STRING,`_origin`STRING,`channel`STRING,`messageId`STRING,`originalTimestamp`STRING,`timestamp`STRING\\n)\\nPARTITIONED BY (dt string)\\nROW FORMAT SERDE\\n'org.openx.data.jsonserde.JsonSerDe'\\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\\nSTORED AS INPUTFORMAT\\n  'org.apache.hadoop.mapred.TextInputFormat'\\nOUTPUTFORMAT\\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\\nLOCATION\\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\\n\", 'MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify']\n: Expecting value: line 1 column 1 (char 0)"
      }
    • id: 36988158
    • payload:
      {
          "datatype_dict": {
              "originalTimestamp": "timestamp_iso8601",
              "receivedAt": "timestamp_iso8601",
              "sentAt": "timestamp_iso8601",
              "timestamp": "timestamp_iso8601"
          },
          "file_format": "json",
          "partition": {
              "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-05-14/",
              "name": "dt",
              "type": "string",
              "value": "2020-05-14"
          },
          "s3": {
              "bucket": "sfdl-segment-sfdc-prod",
              "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify",
              "key": "separated_dt/identify/dt=2020-05-14/1589450979710.467b17edabff.e4797d0.eb150bc1-ca33-4ec4-9e92-4aaecf43d664.json.gz"
          },
          "schema": {
              "name": "sfdl_prod_segment.sfdc_identify",
              "type": "segment"
          }
      }
    • running: False
    • schema_name_id: 99
    • start_time: 2020-05-14 05:10:51
    • success: False
    • trace_id: 10699749574951355238
    • duration: 0:02:04.261037
    • exit_message:
      {
          "ddl_changed": true,
          "ingested_partitions": 519,
          "partitions_applied": false,
          "success": true
      }
    • id: 36414412
    • payload:
      {
          "datatype_dict": {
              "originalTimestamp": "timestamp_iso8601",
              "receivedAt": "timestamp_iso8601",
              "sentAt": "timestamp_iso8601",
              "timestamp": "timestamp_iso8601"
          },
          "file_format": "json",
          "partition": {
              "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-05-03/",
              "name": "dt",
              "type": "string",
              "value": "2020-05-03"
          },
          "s3": {
              "bucket": "sfdl-segment-sfdc-prod",
              "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify",
              "key": "separated_dt/identify/dt=2020-05-03/1588536500207.b9ae9497e1f7.e4797d0.d638001c-187b-4e2f-9e1f-b655a49be343.json.gz"
          },
          "schema": {
              "name": "sfdl_prod_segment.sfdc_identify",
              "type": "segment"
          }
      }
    • running: False
    • schema_name_id: 99
    • start_time: 2020-05-03 15:10:14
    • success: True
    • trace_id: 18106067313119164192
    • duration: 0:02:26.451742
    • exit_message:
      {
          "ddl_changed": true,
          "ingested_partitions": 518,
          "partitions_applied": false,
          "success": true
      }
    • id: 36216005
    • payload:
      {
          "datatype_dict": {
              "originalTimestamp": "timestamp_iso8601",
              "receivedAt": "timestamp_iso8601",
              "sentAt": "timestamp_iso8601",
              "timestamp": "timestamp_iso8601"
          },
          "file_format": "json",
          "partition": {
              "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2020-04-30/",
              "name": "dt",
              "type": "string",
              "value": "2020-04-30"
          },
          "s3": {
              "bucket": "sfdl-segment-sfdc-prod",
              "hive_path": "s3://sfdl-segment-sfdc-prod/separated_dt/identify",
              "key": "separated_dt/identify/dt=2020-04-30/1588212486492.44850b27b671.e4797d0.41da02b5-9766-4264-9731-7c0cca0c31f8.json.gz"
          },
          "schema": {
              "name": "sfdl_prod_segment.sfdc_identify",
              "type": "segment"
          }
      }
    • running: False
    • schema_name_id: 99
    • start_time: 2020-04-29 21:13:10
    • success: True
    • trace_id: 7361690129348836688
Partitions:
  • count: 3
  • ddl:
    [
        "ALTER TABLE\n    `sfdl_prod_segment.sfdc_identify`\nADD\n\n\n PARTITION (dt='2023-06-04') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/'\n PARTITION (dt='2023-06-06') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/'\n PARTITION (dt='2023-06-08') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/'\n;"
    ]
  • name: dt
  • state:
    [
        {
            "info": {
                "id": 927197,
                "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/",
                "name": "dt",
                "value": "2023-06-04"
            },
            "state": {
                "arrival": {
                    "athena_discovery": true,
                    "first_data": "2023-06-03 23:18:49",
                    "last_data": "2023-06-03 23:18:49"
                },
                "parquet": {
                    "first_performed": null,
                    "in_progress": null,
                    "job_id": null,
                    "last_performed": null,
                    "queued": true
                }
            }
        },
        {
            "info": {
                "id": 927682,
                "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/",
                "name": "dt",
                "value": "2023-06-06"
            },
            "state": {
                "arrival": {
                    "athena_discovery": true,
                    "first_data": "2023-06-06 10:56:35",
                    "last_data": "2023-06-06 11:04:33"
                },
                "parquet": {
                    "first_performed": null,
                    "in_progress": null,
                    "job_id": null,
                    "last_performed": null,
                    "queued": true
                }
            }
        },
        {
            "info": {
                "id": 928019,
                "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/",
                "name": "dt",
                "value": "2023-06-08"
            },
            "state": {
                "arrival": {
                    "athena_discovery": true,
                    "first_data": "2023-06-08 06:27:48",
                    "last_data": "2023-06-08 06:27:48"
                },
                "parquet": {
                    "first_performed": null,
                    "in_progress": null,
                    "job_id": null,
                    "last_performed": null,
                    "queued": true
                }
            }
        }
    ]
  • type: string
Schema:
  • attributes:
    {
        "_origin": "string",
        "anonymousId": "string",
        "channel": "string",
        "context.campaign.content": "string",
        "context.campaign.medium": "string",
        "context.campaign.name": "string",
        "context.campaign.source": "string",
        "context.campaign.term": "string",
        "context.ip": "string",
        "context.library.name": "string",
        "context.library.version": "string",
        "context.locale": "string",
        "context.page.path": "string",
        "context.page.referrer": "string",
        "context.page.search": "string",
        "context.page.title": "string",
        "context.page.url": "string",
        "context.userAgent": "string",
        "messageId": "string",
        "originalTimestamp": "string",
        "projectId": "string",
        "receivedAt": "string",
        "sentAt": "string",
        "timestamp": "string",
        "traits.bizrateId": "string",
        "traits.googleAnalyticsDaily": "string",
        "traits.muuid_date": "string",
        "traits.pxResult": "string",
        "traits.request_id": "string",
        "traits.segmentId": "string",
        "traits.timeIncId": "string",
        "traits.timezone": "string",
        "traits.timezoneOffset": "string",
        "type": "string",
        "userId": "string",
        "version": "string"
    }
  • created: 2020-05-14 05:14:31
  • ddl:
    {
        "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`",
        "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`channel`STRING,`context`struct<`ip`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`search`:STRING,`referrer`:STRING,`title`:STRING,`path`:STRING>,`userAgent`:STRING,`locale`:STRING>,`traits`struct<`segmentId`:STRING,`timeIncId`:STRING,`googleAnalyticsDaily`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`request_id`:STRING,`timezoneOffset`:STRING,`bizrateId`:STRING,`timezone`:STRING>,`version`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array>,`receivedAt`STRING,`_origin`STRING,`sentAt`STRING,`timestamp`STRING,`projectId`STRING,`messageId`STRING,`type`STRING,`anonymousId`STRING,`userId`STRING,`integrations`struct<`_empty_`:STRING>,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n",
        "drop_table": "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`",
        "repair_table": "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify"
    }
Server:
  • hostname: schemamanager-5475496c57-7dpqb
  • name: schema_manager
  • process_start: 2026-01-09 11:27:47
  • sf_env: prod
  • version: 2112174301
Raw:
{
    "name": {
        "created": "2017-11-02 13:19:40",
        "disabled": false,
        "format": "raw",
        "id": 99,
        "is_partitioned": true,
        "is_schema_datatyped": false,
        "name": {
            "database": "sfdl_prod_segment",
            "table": "sfdc_identify"
        },
        "type": "segment"
    },
    "partition": {
        "count": 3,
        "ddl": [
            "ALTER TABLE\n    `sfdl_prod_segment.sfdc_identify`\nADD\n\n\n PARTITION (dt='2023-06-04') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/'\n PARTITION (dt='2023-06-06') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/'\n PARTITION (dt='2023-06-08') location 's3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/'\n;"
        ],
        "name": "dt",
        "state": [
            {
                "info": {
                    "id": 927197,
                    "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-04/",
                    "name": "dt",
                    "value": "2023-06-04"
                },
                "state": {
                    "arrival": {
                        "athena_discovery": true,
                        "first_data": "2023-06-03 23:18:49",
                        "last_data": "2023-06-03 23:18:49"
                    },
                    "parquet": {
                        "first_performed": null,
                        "in_progress": null,
                        "job_id": null,
                        "last_performed": null,
                        "queued": true
                    }
                }
            },
            {
                "info": {
                    "id": 927682,
                    "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-06/",
                    "name": "dt",
                    "value": "2023-06-06"
                },
                "state": {
                    "arrival": {
                        "athena_discovery": true,
                        "first_data": "2023-06-06 10:56:35",
                        "last_data": "2023-06-06 11:04:33"
                    },
                    "parquet": {
                        "first_performed": null,
                        "in_progress": null,
                        "job_id": null,
                        "last_performed": null,
                        "queued": true
                    }
                }
            },
            {
                "info": {
                    "id": 928019,
                    "location": "s3://sfdl-segment-sfdc-prod/separated_dt/identify/dt=2023-06-08/",
                    "name": "dt",
                    "value": "2023-06-08"
                },
                "state": {
                    "arrival": {
                        "athena_discovery": true,
                        "first_data": "2023-06-08 06:27:48",
                        "last_data": "2023-06-08 06:27:48"
                    },
                    "parquet": {
                        "first_performed": null,
                        "in_progress": null,
                        "job_id": null,
                        "last_performed": null,
                        "queued": true
                    }
                }
            }
        ],
        "type": "string"
    },
    "schema": {
        "attributes": {
            "_origin": "string",
            "anonymousId": "string",
            "channel": "string",
            "context.campaign.content": "string",
            "context.campaign.medium": "string",
            "context.campaign.name": "string",
            "context.campaign.source": "string",
            "context.campaign.term": "string",
            "context.ip": "string",
            "context.library.name": "string",
            "context.library.version": "string",
            "context.locale": "string",
            "context.page.path": "string",
            "context.page.referrer": "string",
            "context.page.search": "string",
            "context.page.title": "string",
            "context.page.url": "string",
            "context.userAgent": "string",
            "messageId": "string",
            "originalTimestamp": "string",
            "projectId": "string",
            "receivedAt": "string",
            "sentAt": "string",
            "timestamp": "string",
            "traits.bizrateId": "string",
            "traits.googleAnalyticsDaily": "string",
            "traits.muuid_date": "string",
            "traits.pxResult": "string",
            "traits.request_id": "string",
            "traits.segmentId": "string",
            "traits.timeIncId": "string",
            "traits.timezone": "string",
            "traits.timezoneOffset": "string",
            "type": "string",
            "userId": "string",
            "version": "string"
        },
        "created": "2020-05-14 05:14:31",
        "ddl": {
            "create_database": "CREATE DATABASE IF NOT EXISTS `sfdl_prod_segment`",
            "create_table": "CREATE EXTERNAL TABLE IF NOT EXISTS\n`sfdl_prod_segment.sfdc_identify`\n(\n`channel`STRING,`context`struct<`ip`:STRING,`campaign`:struct<`content`:STRING,`source`:STRING,`term`:STRING,`medium`:STRING,`name`:STRING>,`library`:struct<`version`:STRING,`name`:STRING>,`page`:struct<`url`:STRING,`search`:STRING,`referrer`:STRING,`title`:STRING,`path`:STRING>,`userAgent`:STRING,`locale`:STRING>,`traits`struct<`segmentId`:STRING,`timeIncId`:STRING,`googleAnalyticsDaily`:STRING,`muuid_date`:STRING,`pxResult`:STRING,`request_id`:STRING,`timezoneOffset`:STRING,`bizrateId`:STRING,`timezone`:STRING>,`version`STRING,`_metadata`struct<`bundled`:array,`unbundled`:array>,`receivedAt`STRING,`_origin`STRING,`sentAt`STRING,`timestamp`STRING,`projectId`STRING,`messageId`STRING,`type`STRING,`anonymousId`STRING,`userId`STRING,`integrations`struct<`_empty_`:STRING>,`originalTimestamp`STRING\n)\nPARTITIONED BY (dt string)\nROW FORMAT SERDE\n'org.openx.data.jsonserde.JsonSerDe'\n WITH SERDEPROPERTIES ( 'mapping.ga_client_id' = 'GA Client ID' , 'mapping.GA_Client_ID' = 'GA Client ID' )\nSTORED AS INPUTFORMAT\n  'org.apache.hadoop.mapred.TextInputFormat'\nOUTPUTFORMAT\n  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'\nLOCATION\n  's3://sfdl-segment-sfdc-prod/separated_dt/identify'\n",
            "drop_table": "DROP TABLE IF EXISTS `sfdl_prod_segment.sfdc_identify`",
            "repair_table": "MSCK REPAIR TABLE sfdl_prod_segment.sfdc_identify"
        }
    },
    "server": {
        "hostname": "schemamanager-5475496c57-7dpqb",
        "name": "schema_manager",
        "process_start": "2026-01-09 11:27:47",
        "sf_env": "prod",
        "version": "2112174301"
    }
}