[MongoDB] Time Series Collection

DataBase 2023. 7. 11. 18:05

MongoDB v5.0부터 TimeSeries collection 사용가능.

time series collection을 생성할 경우,
MongoDB가 query 효율을 높이고 disk i/o를 최소화하는 등 time series data를 다루는데 적합하도록 내부적으로 clustered index를 포함한 최적화 된 저장포맷으로 만들어 줌.

Time Series Collection 생성은 아래와 같이.

db.createCollection(
    'weather',
    {
        timeseries: {
            // 필수값. 시간을 지정하는 필드명. BSON date 타입만 가능.
            timeField: 'timestamp', 
            // time series의 unique label로 사용할 필드명. 
            // _id 또는 timeseries.timeField에 지정한 이름은 불가.
            metaField: 'sensorId',  
            // 지정시 내부적으로 데이터를 저장할 때 시간단위별로 최적화하여 퍼포먼스 향상. 
            // seconds / minutes / hours 중 하나. 기본값은 seconds
            granularity: 'hours'
        },
        expireAfterSeconds: 86400 // TTL 지정 가능.      
    }
)

생성할 때 외에는 그냥 일반 collection과 동일하게 사용 가능.

// 생성
[direct: mongos] db> db.createCollection(
    'weather',
    {
        timeseries: {
            timeField: 't',
            metaField: 'sensorId'
        },
        expireAfterSeconds: 86400
    }
)
 
 
 
// 확인
[direct: mongos] db> show collections
weatcher                 [time-series]
system.buckets.weather
system.profile
system.views
 
// DB의 timeseries collection 리스트(+옵션) 출력
[direct: mongos] db> db.runCommand({
    listCollections: 1,
    filter: { type: 'timeseries' }
})
{
  cursor: {
    id: Long("0"),
    ns: 'db.$cmd.listCollections',
    firstBatch: [
      {
        name: 'weather',
        type: 'timeseries',
        options: {
          expireAfterSeconds: Long("86400"),
          timeseries: {
            timeField: 't',
            metaField: 'sensorId',
            granularity: 'seconds',
            bucketMaxSpanSeconds: 3600
          }
        },
        info: { readOnly: false }
      }
    ]
  },
  ok: 1,
  '$clusterTime': {
    clusterTime: Timestamp({ t: 1689058766, i: 1 }),
    signature: {
      hash: Binary(Buffer.from("3acfae0935d3589d5cf8e09b745e0cafaf229771", "hex"), 0),
      keyId: Long("7231062885789597709")
    }
  },
  operationTime: Timestamp({ t: 1689058763, i: 1 })
}
 
 
 
 
// insert
[direct: mongos] db> const data = [];
// best practice에 따르면 metadata별로 정렬된 batch가 더 효율적이라고 함
// 필드순서도 동일하게 하는게 좋고, insert하는 client 수를 늘이는 것도 좋다고 함
// 압축효율을 위해 만약 빈 array나 object가 있을 경우 필드를 아예 생략하는 것도 좋고
['s0', 's1', 's2', 's3'].forEach((sensorId)=>{
    for (let i=0; i<86400; i++) {
        let t = new Date('2023-07-11T00:00:00');
        t.setSeconds(t.getSeconds() + i);
        data.push({
            t: t,
            sensorId: sensorId,
            temp: Math.floor(Math.random() * 11) + 10
        });
    }
});
 
 
db.weather.insertMany(data);
// 뭔가 엄청나게 출력될 것이다...
 
 
 
 
[direct: mongos] db> db.weather.countDocuments()
345600
 
 
// query 1 - find
[direct: mongos] db> db.weather.find({t:{$gte:ISODate('2023-07-11T01:00:00')}, sensorId:'s1'})
.sort({t:1}).limit(5)
[
  {
    t: ISODate("2023-07-11T01:00:00.000Z"),
    sensorId: 's1',
    _id: ObjectId("64ad01f9644715f2ed2f3cdb"),
    temp: 10
  },
  {
    t: ISODate("2023-07-11T01:00:01.000Z"),
    sensorId: 's1',
    _id: ObjectId("64ad01f9644715f2ed2f3cdf"),
    temp: 19
  },
  {
    t: ISODate("2023-07-11T01:00:02.000Z"),
    sensorId: 's1',
    _id: ObjectId("64ad01f9644715f2ed2f3ce3"),
    temp: 19
  },
  {
    t: ISODate("2023-07-11T01:00:03.000Z"),
    sensorId: 's1',
    _id: ObjectId("64ad01f9644715f2ed2f3ce7"),
    temp: 13
  },
  {
    t: ISODate("2023-07-11T01:00:04.000Z"),
    sensorId: 's1',
    _id: ObjectId("64ad01f9644715f2ed2f3ceb"),
    temp: 12
  }
]
 
// query 2 - aggregate
db.weather.aggregate([
    {
        $match: {
            t: {
                $gte: ISODate('2023-07-11T00:00:00.000Z'),
                $lt: ISODate('2023-07-12T00:00:00.000Z')
            }
        }
    },
    {
        $group: {
            _id: {
                y: { $year: '$t' },
                m: { $month: '$t' },
                d: { $dayOfMonth: '$t' },
                h: { $hour: '$t' }
            },
            avgTemp: { $avg: '$temp' }
        }
    },
    {
        $sort: {
            '_id.y': 1,
            '_id.m': 1,
            '_id.d': 1,
            '_id.h': 1
        }
    }
])
 
[
  { _id: { y: 2023, m: 7, d: 11, h: 0 }, avgTemp: 14.959930555555555 },
  { _id: { y: 2023, m: 7, d: 11, h: 1 }, avgTemp: 15.016805555555555 },
  { _id: { y: 2023, m: 7, d: 11, h: 2 }, avgTemp: 14.979930555555555 },
  { _id: { y: 2023, m: 7, d: 11, h: 3 }, avgTemp: 14.983055555555556 },
  { _id: { y: 2023, m: 7, d: 11, h: 4 }, avgTemp: 14.954027777777778 },
  { _id: { y: 2023, m: 7, d: 11, h: 5 }, avgTemp: 14.999444444444444 },
  { _id: { y: 2023, m: 7, d: 11, h: 6 }, avgTemp: 15.020486111111111 },
  { _id: { y: 2023, m: 7, d: 11, h: 7 }, avgTemp: 14.993402777777778 },
  { _id: { y: 2023, m: 7, d: 11, h: 8 }, avgTemp: 15.039513888888889 },
  { _id: { y: 2023, m: 7, d: 11, h: 9 }, avgTemp: 15.038611111111111 },
  { _id: { y: 2023, m: 7, d: 11, h: 10 }, avgTemp: 15.01013888888889 },
  { _id: { y: 2023, m: 7, d: 11, h: 11 }, avgTemp: 14.971319444444445 },
  { _id: { y: 2023, m: 7, d: 11, h: 12 }, avgTemp: 15.035763888888889 },
  { _id: { y: 2023, m: 7, d: 11, h: 13 }, avgTemp: 14.980208333333334 },
  { _id: { y: 2023, m: 7, d: 11, h: 14 }, avgTemp: 14.975 }
]

그리 대용량으로 테스트해보진 못했으나 동일한 데이터를 {t:1, sensorId:1} 로 compound index만들 collection에 때려넣고,
마지막 aggregate로 explain() 해보니까 대충 어떤 느낌인지 알 것 같음.
상황에 따라 200ms ~ 2000ms 이상까지 차이가 남을 확인.
데이터가 많을 경우를 가정했을 때 이 정도면 유의미한 차이인 듯.

// 이건 time series collection의 executionStats
executionStats: {
  executionSuccess: true,
  nReturned: 220,
  executionTimeMillis: 411,
  totalKeysExamined: 0,
  totalDocsExamined: 233,
  executionStages: {
    stage: 'CLUSTERED_IXSCAN',
    filter: {
      '$and': [
        {
          _id: { '$lt': ObjectId("64aded000000000000000000") }
        },
        {
          _id: { '$gte': ObjectId("64ac8d700000000000000000") }
        },
        {
          'control.max.t': {
            '$_internalExprGte': ISODate("2023-07-11T00:00:00.000Z")
          }
        },
        {
          'control.min.t': {
            '$_internalExprGte': ISODate("2023-07-10T23:00:00.000Z")
          }
        },
        {
          'control.max.t': {
            '$_internalExprLt': ISODate("2023-07-12T01:00:00.000Z")
          }
        },
        {
          'control.min.t': {
            '$_internalExprLt': ISODate("2023-07-12T00:00:00.000Z")
          }
        }
      ]
    },
    nReturned: 220,
    executionTimeMillisEstimate: 0,
    works: 235,
    advanced: 220,
    needTime: 14,
    needYield: 0,
    saveState: 1,
    restoreState: 1,
    isEOF: 1,
    direction: 'forward',
    minRecord: ObjectId("64ac8d700000000000000000"),
    maxRecord: ObjectId("64aded000000000000000000"),
    docsExamined: 233
  }
}
 
 
// 이건 일반 collection의 executionStats
executionStats: {
  executionSuccess: true,
  nReturned: 216000,
  executionTimeMillis: 2325,
  totalKeysExamined: 216000,
  totalDocsExamined: 216000,
  executionStages: {
    stage: 'PROJECTION_SIMPLE',
    nReturned: 216000,
    executionTimeMillisEstimate: 608,
    works: 216001,
    advanced: 216000,
    needTime: 0,
    needYield: 0,
    saveState: 232,
    restoreState: 232,
    isEOF: 1,
    transformBy: { t: 1, temp: 1, _id: 0 },
    inputStage: {
      stage: 'FETCH',
      nReturned: 216000,
      executionTimeMillisEstimate: 571,
      works: 216001,
      advanced: 216000,
      needTime: 0,
      needYield: 0,
      saveState: 232,
      restoreState: 232,
      isEOF: 1,
      docsExamined: 216000,
      alreadyHasObj: 0,
      inputStage: {
        stage: 'IXSCAN',
        nReturned: 216000,
        executionTimeMillisEstimate: 311,
        works: 216001,
        advanced: 216000,
        needTime: 0,
        needYield: 0,
        saveState: 232,
        restoreState: 232,
        isEOF: 1,
        keyPattern: { t: 1, sensorId: 1 },
        indexName: 't_1_sensorId_1',
        isMultiKey: false,
        multiKeyPaths: { t: [], sensorId: [] },
        isUnique: false,
        isSparse: false,
        isPartial: false,
        indexVersion: 2,
        direction: 'forward',
        indexBounds: {
          t: [
            '[new Date(1689033600000), new Date(1689120000000))'
          ],
          sensorId: [ '[MinKey, MaxKey]' ]
        },
        keysExamined: 216000,
        seeks: 1,
        dupsTested: 0,
        dupsDropped: 0
      }
    }
  }
}

time series collection의 경우 sharding은 가능한데 reshard는 안되므로 주의할 것.
그 외 불가능한 것들도 있으니, 생성옵션 등 자잘한 것들은 공식문서 참조.

https://www.mongodb.com/docs/manual/core/timeseries-collections/

저작자표시

'DataBase' 카테고리의 다른 글

[MongoDB] Schema Validation (스키마 유효성 검사) (0)	2023.07.19
[MongoDB] collection에 해당하는 chunk 리스트 가져오기 (get chunks list from config.chunks) (0)	2023.07.13
[MongoDB] ChangeStream (0)	2023.07.05
[MongoDB] shard key 변경 (refineCollectionShardKey, reshard) (0)	2023.06.30
[MongoDB] 트랜잭션 (Transaction) (0)	2023.06.01

Posted by bloodguy

Bloodguy
Twitter : @nicehide