diff --git a/.github/actions/genprotos/action.yml b/.github/actions/genprotos/action.yml index 6d277c361f..2ac2b79286 100644 --- a/.github/actions/genprotos/action.yml +++ b/.github/actions/genprotos/action.yml @@ -15,7 +15,7 @@ runs: key: ${{ runner.os }}-build-genprotos-${{ hashFiles('buf.gen.yaml', './protos/peers.proto', './protos/flow.proto', './protos/route.proto') }} - if: steps.cache.outputs.cache-hit != 'true' - uses: bufbuild/buf-action@dfda68eacb65895184c76b9ae522b977636a2c47 # v1 + uses: bufbuild/buf-action@c231a1aa9281e5db706c970f468f0744a37561fd # v1 with: setup_only: true github_token: ${{ github.token }} diff --git a/.github/workflows/cleanup.yml b/.github/workflows/cleanup.yml index 228b55708e..6ba8c68d53 100644 --- a/.github/workflows/cleanup.yml +++ b/.github/workflows/cleanup.yml @@ -14,7 +14,7 @@ jobs: - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5 with: - go-version: '1.24.3' + go-version: '1.24.4' cache-dependency-path: e2e_cleanup/go.sum - name: download go modules diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml index f77a2c8950..7149c59b72 100644 --- a/.github/workflows/codeql-analysis.yml +++ b/.github/workflows/codeql-analysis.yml @@ -47,12 +47,12 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@fca7ace96b7d713c7035871441bd52efbe39e27e # v3 + uses: github/codeql-action/init@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3 with: languages: ${{ matrix.language }} build-mode: ${{ matrix.build-mode }} - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@fca7ace96b7d713c7035871441bd52efbe39e27e # v3 + uses: github/codeql-action/analyze@ce28f5bb42b7a9f2c824e633a3f6ee835bab6858 # v3 with: category: "/language:${{matrix.language}}" diff --git a/.github/workflows/flow.yml b/.github/workflows/flow.yml index 2c2e41607d..683ab638bf 100644 --- a/.github/workflows/flow.yml +++ b/.github/workflows/flow.yml @@ -28,6 +28,11 @@ jobs: POSTGRES_PASSWORD: postgres POSTGRES_DB: postgres POSTGRES_INITDB_ARGS: --locale=C.UTF-8 + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 mysql: image: ${{ startsWith(matrix.db-version.mysql, 'mysql') && 'mysql:oracle@sha256:0596fa224cdf3b3355ce3ddbfd7ce77be27ec9e51841dfc5d2e1c8b81eea69d2' || '' }} ports: @@ -35,12 +40,12 @@ jobs: env: MYSQL_ROOT_PASSWORD: cipass redpanda: - image: redpandadata/redpanda@sha256:a138e257877c494fa6854502273ee8db07637395c0a6da076da5babb0735b181 + image: redpandadata/redpanda@sha256:95182a95a80f187a99cfe9fa03aaf18da0a3809a6741a229572f7ec33e795c13 ports: - 9092:9092 - 9644:9644 elasticsearch: - image: elasticsearch:9.0.1@sha256:26abc2782ee790b76df4e0806ea492967d14bcddb8c58e0eb3cc8593b363ad6c + image: elasticsearch:9.0.2@sha256:9681044f2e264ee9abde6594b7503537cfc98e44dd89d26b9533bfaab9c8c42b ports: - 9200:9200 env: @@ -48,7 +53,7 @@ jobs: xpack.security.enabled: false xpack.security.enrollment.enabled: false minio: - image: bitnami/minio:2025.5.24@sha256:69703ab18751142bf5487dae90fc5654cc426263e258610b0512cee00030369e + image: bitnami/minio:2025.5.24@sha256:451fe6858cb770cc9d0e77ba811ce287420f781c7c1b806a386f6896471a349c ports: - 9999:9999 env: @@ -58,7 +63,7 @@ jobs: AWS_EC2_METADATA_DISABLED: true MINIO_DEFAULT_BUCKETS: peerdb otelcol: - image: otel/opentelemetry-collector-contrib:0.127.0@sha256:e94cfd92357aa21f4101dda3c0c01f90e6f24115ba91b263c4d09fed7911ae68 + image: otel/opentelemetry-collector-contrib:0.128.0@sha256:1ab0baba0ee3695d823c46653d8a6e8894896e668ce8bd7ebe002e948d827bc7 ports: - 4317:4317 @@ -70,15 +75,13 @@ jobs: - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5 with: - go-version: '1.24.3' + go-version: '1.24.4' cache-dependency-path: flow/go.sum - name: install lib-geos run: | # No need to update man pages on package install sudo apt-get remove --purge man-db - - sudo apt-get update sudo apt-get install libgeos-dev - run: go mod download @@ -126,9 +129,35 @@ jobs: if: matrix.db-version.mysql == 'maria' run: docker run -d --rm --name mariadb -p 3306:3306 -e MARIADB_ROOT_PASSWORD=cipass mariadb:lts --log-bin=maria - - name: create hstore extension, increase logical replication limits, and setup catalog database + - name: Mongo + run: | + docker run -d --rm --name mongo -p 27017:27017 mongo:8.0.10 --replSet rs0 --bind_ip_all + until docker exec mongo mongosh --eval 'db.runCommand({ ping: 1 })' &> /dev/null; do + echo "Waiting for MongoDB to be ready..." + sleep 2 + done + docker exec mongo mongosh --eval 'rs.initiate({ + _id: "rs0", + members: [ + { _id: 0, host: "localhost:27017" } + ] + })' + + - name: MinIO TLS + run: > + mkdir -p certs && + openssl genrsa -out certs/cert.key 2048 && + openssl req -new -key certs/cert.key -out certs/cert.csr -subj /CN=minio.local && + openssl x509 -req -days 3650 -in certs/cert.csr -signkey certs/cert.key -out certs/cert.crt && + chown -R 1001 certs && + docker run -d --rm --name miniotls -p 9998:9998 -v "$PWD/certs:/certs" -e MINIO_SCHEME=https bitnami/minio:latest + + - name: create postgres extensions, increase logical replication limits, and setup catalog database run: > - docker exec "${{ job.services.catalog.id }}" psql -U postgres -c "CREATE EXTENSION hstore;" + docker exec "${{ job.services.catalog.id }}" apk add --no-cache build-base git && + docker exec "${{ job.services.catalog.id }}" git clone --branch v0.8.0 https://github.com/pgvector/pgvector.git /tmp/pgvector && + docker exec "${{ job.services.catalog.id }}" sh -c 'cd /tmp/pgvector && make with_llvm=no && make with_llvm=no install' && + docker exec "${{ job.services.catalog.id }}" psql -U postgres -c "CREATE EXTENSION hstore;CREATE EXTENSION vector;" -c "ALTER SYSTEM SET wal_level=logical;" -c "ALTER SYSTEM SET max_replication_slots=192;" -c "ALTER SYSTEM SET max_wal_senders=256;" @@ -184,6 +213,7 @@ jobs: AWS_ACCESS_KEY_ID: minio AWS_SECRET_ACCESS_KEY: miniosecret AWS_REGION: us-east-1 + AWS_ENDPOINT_URL_S3_TLS: https://localhost:9998 PEERDB_CLICKHOUSE_AWS_CREDENTIALS_AWS_ACCESS_KEY_ID: minio PEERDB_CLICKHOUSE_AWS_CREDENTIALS_AWS_SECRET_ACCESS_KEY: miniosecret PEERDB_CLICKHOUSE_AWS_CREDENTIALS_AWS_REGION: us-east-1 @@ -212,6 +242,8 @@ jobs: ELASTICSEARCH_TEST_ADDRESS: http://localhost:9200 CI_PG_VERSION: ${{ matrix.db-version.pg }} CI_MYSQL_VERSION: ${{ matrix.db-version.mysql }} + CI_MONGO_VERSION: 8.0.10 + CI_MONGO_URI: mongodb://localhost:27017/?replicaSet=rs0 ENABLE_OTEL_METRICS: ${{ (matrix.db-version.pg == '16' || matrix.db-version.mysql == 'mysql-pos') && 'true' || 'false' }} OTEL_EXPORTER_OTLP_METRICS_ENDPOINT: http://localhost:4317 OTEL_EXPORTER_OTLP_METRICS_PROTOCOL: grpc diff --git a/.github/workflows/golang-lint.yml b/.github/workflows/golang-lint.yml index 2672895350..65e6f5ad80 100644 --- a/.github/workflows/golang-lint.yml +++ b/.github/workflows/golang-lint.yml @@ -21,15 +21,20 @@ jobs: - name: install lib-geos run: | - sudo apt-get update sudo apt-get install libgeos-dev - uses: actions/setup-go@d35c59abb061a4a6fb18e82ac0862c26744d6ab5 # v5 with: - go-version: '1.24.3' + go-version: '1.24.4' cache: false - - name: golangci-lint + - name: golangci-lint flow uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8 with: version: v2.1.6 working-directory: ./flow args: --timeout=10m + - name: golangci-lint e2e_cleanup + uses: golangci/golangci-lint-action@4afd733a84b1f43292c63897423277bb7f4313a9 # v8 + with: + version: v2.1.6 + working-directory: ./e2e_cleanup + args: --timeout=10m diff --git a/docker-compose-dev.yml b/docker-compose-dev.yml index f138c2e9dd..99ef5384b2 100644 --- a/docker-compose-dev.yml +++ b/docker-compose-dev.yml @@ -39,7 +39,7 @@ x-flow-worker-env: &flow-worker-env services: catalog: container_name: catalog - image: postgres:17-alpine@sha256:bcb90dc18910057ff49ce2ea157d8a0d534964090d39af959df41083f18c3318 + image: postgres:17-alpine@sha256:fbe21607052bb5c298674f2fd8cf044a63aa3ddf50b81627f894f91f40f50bcb command: -c config_file=/etc/postgresql.conf ports: - 9901:5432 @@ -110,7 +110,7 @@ services: - TEMPORAL_ADDRESS=temporal:7233 - TEMPORAL_CORS_ORIGINS=http://localhost:3000 - TEMPORAL_CSRF_COOKIE_INSECURE=true - image: temporalio/ui:v2.37.4@sha256:757dfa399aa923edb76fe1b1054c146cab83cf7aa0cf5182873c42d503cf8b2b + image: temporalio/ui:v2.38.3@sha256:0e9c3bd6a79c4d8ffac9aed098f22862a1c0f4a73f7fc47e3a1031a413d7d967 ports: - 8085:8080 diff --git a/docker-compose.yml b/docker-compose.yml index 71725d9d48..c0134ce672 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -32,7 +32,7 @@ x-flow-worker-env: &flow-worker-env services: catalog: container_name: catalog - image: postgres:17-alpine@sha256:bcb90dc18910057ff49ce2ea157d8a0d534964090d39af959df41083f18c3318 + image: postgres:17-alpine@sha256:fbe21607052bb5c298674f2fd8cf044a63aa3ddf50b81627f894f91f40f50bcb command: -c config_file=/etc/postgresql.conf restart: unless-stopped ports: @@ -106,13 +106,13 @@ services: - TEMPORAL_ADDRESS=temporal:7233 - TEMPORAL_CORS_ORIGINS=http://localhost:3000 - TEMPORAL_CSRF_COOKIE_INSECURE=true - image: temporalio/ui:v2.37.4@sha256:757dfa399aa923edb76fe1b1054c146cab83cf7aa0cf5182873c42d503cf8b2b + image: temporalio/ui:v2.38.3@sha256:0e9c3bd6a79c4d8ffac9aed098f22862a1c0f4a73f7fc47e3a1031a413d7d967 ports: - 8085:8080 flow-api: container_name: flow_api - image: ghcr.io/peerdb-io/flow-api:stable-v0.30.0 + image: ghcr.io/peerdb-io/flow-api:stable-v0.30.4 restart: unless-stopped ports: - 8112:8112 @@ -128,7 +128,7 @@ services: flow-snapshot-worker: container_name: flow-snapshot-worker - image: ghcr.io/peerdb-io/flow-snapshot-worker:stable-v0.30.0 + image: ghcr.io/peerdb-io/flow-snapshot-worker:stable-v0.30.4 restart: unless-stopped environment: <<: [*catalog-config, *flow-worker-env, *minio-config] @@ -138,7 +138,7 @@ services: flow-worker: container_name: flow-worker - image: ghcr.io/peerdb-io/flow-worker:stable-v0.30.0 + image: ghcr.io/peerdb-io/flow-worker:stable-v0.30.4 restart: unless-stopped environment: <<: [*catalog-config, *flow-worker-env, *minio-config] @@ -151,7 +151,7 @@ services: peerdb: container_name: peerdb-server stop_signal: SIGINT - image: ghcr.io/peerdb-io/peerdb-server:stable-v0.30.0 + image: ghcr.io/peerdb-io/peerdb-server:stable-v0.30.4 restart: unless-stopped environment: <<: *catalog-config @@ -167,7 +167,7 @@ services: peerdb-ui: container_name: peerdb-ui - image: ghcr.io/peerdb-io/peerdb-ui:stable-v0.30.0 + image: ghcr.io/peerdb-io/peerdb-ui:stable-v0.30.4 restart: unless-stopped ports: - 3000:3000 diff --git a/e2e_cleanup/go.mod b/e2e_cleanup/go.mod index 24fd1c909c..c764854024 100644 --- a/e2e_cleanup/go.mod +++ b/e2e_cleanup/go.mod @@ -7,12 +7,12 @@ require ( cloud.google.com/go/pubsub v1.49.0 github.com/snowflakedb/gosnowflake v1.14.1 github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 - google.golang.org/api v0.235.0 + google.golang.org/api v0.238.0 ) require ( cloud.google.com/go v0.121.2 // indirect - cloud.google.com/go/auth v0.16.1 // indirect + cloud.google.com/go/auth v0.16.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/compute/metadata v0.7.0 // indirect cloud.google.com/go/iam v1.5.2 // indirect @@ -23,28 +23,28 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.1 // indirect github.com/BurntSushi/toml v1.5.0 // indirect github.com/andybalholm/brotli v1.1.1 // indirect - github.com/apache/arrow-go/v18 v18.3.0 // indirect + github.com/apache/arrow-go/v18 v18.3.1 // indirect github.com/apache/arrow/go/v15 v15.0.2 // indirect github.com/apache/thrift v0.22.0 // indirect - github.com/aws/aws-sdk-go-v2 v1.36.3 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect - github.com/aws/aws-sdk-go-v2/config v1.29.14 // indirect - github.com/aws/aws-sdk-go-v2/credentials v1.17.67 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.77 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect + github.com/aws/aws-sdk-go-v2 v1.36.5 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 // indirect + github.com/aws/aws-sdk-go-v2/config v1.29.17 // indirect + github.com/aws/aws-sdk-go-v2/credentials v1.17.70 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32 // indirect + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.81 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.2 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 // indirect - github.com/aws/aws-sdk-go-v2/service/s3 v1.80.0 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect - github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 // indirect - github.com/aws/smithy-go v1.22.3 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.36 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.17 // indirect + github.com/aws/aws-sdk-go-v2/service/s3 v1.81.0 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.25.5 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3 // indirect + github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 // indirect + github.com/aws/smithy-go v1.22.4 // indirect github.com/danieljoos/wincred v1.2.2 // indirect github.com/dvsekhvalnov/jose2go v1.8.0 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect @@ -79,21 +79,21 @@ require ( go.opentelemetry.io/otel v1.36.0 // indirect go.opentelemetry.io/otel/metric v1.36.0 // indirect go.opentelemetry.io/otel/trace v1.36.0 // indirect - golang.org/x/crypto v0.38.0 // indirect - golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b // indirect - golang.org/x/mod v0.24.0 // indirect - golang.org/x/net v0.40.0 // indirect + golang.org/x/crypto v0.39.0 // indirect + golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b // indirect + golang.org/x/mod v0.25.0 // indirect + golang.org/x/net v0.41.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect - golang.org/x/sync v0.14.0 // indirect + golang.org/x/sync v0.15.0 // indirect golang.org/x/sys v0.33.0 // indirect golang.org/x/term v0.32.0 // indirect - golang.org/x/text v0.25.0 // indirect - golang.org/x/time v0.11.0 // indirect - golang.org/x/tools v0.33.0 // indirect + golang.org/x/text v0.26.0 // indirect + golang.org/x/time v0.12.0 // indirect + golang.org/x/tools v0.34.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/genproto v0.0.0-20250528174236-200df99c418a // indirect - google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect - google.golang.org/grpc v1.72.2 // indirect + google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect + google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect + google.golang.org/grpc v1.73.0 // indirect google.golang.org/protobuf v1.36.6 // indirect ) diff --git a/e2e_cleanup/go.sum b/e2e_cleanup/go.sum index fda862d938..75a2f37336 100644 --- a/e2e_cleanup/go.sum +++ b/e2e_cleanup/go.sum @@ -1,10 +1,10 @@ -cel.dev/expr v0.20.0 h1:OunBvVCfvpWlt4dN7zg3FM6TDkzOePe1+foGJ9AXeeI= -cel.dev/expr v0.20.0/go.mod h1:MrpN08Q+lEBs+bGYdLxxHkZoUSsCp0nSKTs0nTymJgw= +cel.dev/expr v0.23.0 h1:wUb94w6OYQS4uXraxo9U+wUAs9jT47Xvl4iPgAwM2ss= +cel.dev/expr v0.23.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.121.2 h1:v2qQpN6Dx9x2NmwrqlesOt3Ys4ol5/lFZ6Mg1B7OJCg= cloud.google.com/go v0.121.2/go.mod h1:nRFlrHq39MNVWu+zESP2PosMWA0ryJw8KUBZ2iZpxbw= -cloud.google.com/go/auth v0.16.1 h1:XrXauHMd30LhQYVRHLGvJiYeczweKQXZxsTbV9TiguU= -cloud.google.com/go/auth v0.16.1/go.mod h1:1howDHJ5IETh/LwYs3ZxvlkXF48aSqqJUM+5o02dNOI= +cloud.google.com/go/auth v0.16.2 h1:QvBAGFPLrDeoiNjyfVunhQ10HKNYuOwZ5noee0M5df4= +cloud.google.com/go/auth v0.16.2/go.mod h1:sRBas2Y1fB1vZTdurouM0AzuYQBMZinrUYL8EufhtEA= cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/bigquery v1.69.0 h1:rZvHnjSUs5sHK3F9awiuFk2PeOaB8suqNuim21GbaTc= @@ -52,57 +52,57 @@ github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapp github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.51.0/go.mod h1:otE2jQekW/PqXk1Awf5lmfokJx4uwuqcj1ab5SpGeW0= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= -github.com/apache/arrow-go/v18 v18.3.0 h1:Xq4A6dZj9Nu33sqZibzn012LNnewkTUlfKVUFD/RX/I= -github.com/apache/arrow-go/v18 v18.3.0/go.mod h1:eEM1DnUTHhgGAjf/ChvOAQbUQ+EPohtDrArffvUjPg8= +github.com/apache/arrow-go/v18 v18.3.1 h1:oYZT8FqONiK74JhlH3WKVv+2NKYoyZ7C2ioD4Dj3ixk= +github.com/apache/arrow-go/v18 v18.3.1/go.mod h1:12QBya5JZT6PnBihi5NJTzbACrDGXYkrgjujz3MRQXU= github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE= github.com/apache/arrow/go/v15 v15.0.2/go.mod h1:DGXsR3ajT524njufqf95822i+KTh+yea1jass9YXgjA= github.com/apache/thrift v0.22.0 h1:r7mTJdj51TMDe6RtcmNdQxgn9XcyfGDOzegMDRg47uc= github.com/apache/thrift v0.22.0/go.mod h1:1e7J/O1Ae6ZQMTYdy9xa3w9k+XHWPfRvdPyJeynQ+/g= -github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= -github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 h1:zAybnyUQXIZ5mok5Jqwlf58/TFE7uvd3IAsa1aF9cXs= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10/go.mod h1:qqvMj6gHLR/EXWZw4ZbqlPbQUyenf4h82UQUlKc+l14= -github.com/aws/aws-sdk-go-v2/config v1.29.14 h1:f+eEi/2cKCg9pqKBoAIwRGzVb70MRKqWX4dg1BDcSJM= -github.com/aws/aws-sdk-go-v2/config v1.29.14/go.mod h1:wVPHWcIFv3WO89w0rE10gzf17ZYy+UVS1Geq8Iei34g= -github.com/aws/aws-sdk-go-v2/credentials v1.17.67 h1:9KxtdcIA/5xPNQyZRgUSpYOE6j9Bc4+D7nZua0KGYOM= -github.com/aws/aws-sdk-go-v2/credentials v1.17.67/go.mod h1:p3C44m+cfnbv763s52gCqrjaqyPikj9Sg47kUVaNZQQ= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.77 h1:xaRN9fags7iJznsMEjtcEuON1hGfCZ0y5MVfEMKtrx8= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.77/go.mod h1:lolsiGkT47AZ3DWqtxgEQM/wVMpayi7YWNjl3wHSRx8= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q= +github.com/aws/aws-sdk-go-v2 v1.36.5 h1:0OF9RiEMEdDdZEMqF9MRjevyxAQcf6gY+E7vwBILFj0= +github.com/aws/aws-sdk-go-v2 v1.36.5/go.mod h1:EYrzvCCN9CMUTa5+6lf6MM4tq3Zjp8UhSGR/cBsjai0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 h1:12SpdwU8Djs+YGklkinSSlcrPyj3H4VifVsKf78KbwA= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11/go.mod h1:dd+Lkp6YmMryke+qxW/VnKyhMBDTYP41Q2Bb+6gNZgY= +github.com/aws/aws-sdk-go-v2/config v1.29.17 h1:jSuiQ5jEe4SAMH6lLRMY9OVC+TqJLP5655pBGjmnjr0= +github.com/aws/aws-sdk-go-v2/config v1.29.17/go.mod h1:9P4wwACpbeXs9Pm9w1QTh6BwWwJjwYvJ1iCt5QbCXh8= +github.com/aws/aws-sdk-go-v2/credentials v1.17.70 h1:ONnH5CM16RTXRkS8Z1qg7/s2eDOhHhaXVd72mmyv4/0= +github.com/aws/aws-sdk-go-v2/credentials v1.17.70/go.mod h1:M+lWhhmomVGgtuPOhO85u4pEa3SmssPTdcYpP/5J/xc= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32 h1:KAXP9JSHO1vKGCr5f4O6WmlVKLFFXgWYAGoJosorxzU= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32/go.mod h1:h4Sg6FQdexC1yYG9RDnOvLbW1a/P986++/Y/a+GyEM8= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.81 h1:E5ff1vZlAudg24j5lF6F6/gBpln2LjWxGdQDBSLfVe4= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.81/go.mod h1:hHBLCuhHI4Aokvs5vdVoCDBzmFy86yxs5J7LEPQwQEM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36 h1:SsytQyTMHMDPspp+spo7XwXTP44aJZZAC7fBV2C5+5s= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36/go.mod h1:Q1lnJArKRXkenyog6+Y+zr7WDpk4e6XlR6gs20bbeNo= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36 h1:i2vNHQiXUvKhs3quBR6aqlgJaiaexz/aNvdCktW/kAM= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36/go.mod h1:UdyGa7Q91id/sdyHPwth+043HhmP6yP9MBHgbZM0xo8= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 h1:ZNTqv4nIdE/DiBfUUfXcLZ/Spcuz+RjeziUtNJackkM= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34/go.mod h1:zf7Vcd1ViW7cPqYWEHLHJkS50X0JS2IKz9Cgaj6ugrs= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.2 h1:BCG7DCXEXpNCcpwCxg1oi9pkJWH2+eZzTn9MY56MbVw= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.2/go.mod h1:iu6FSzgt+M2/x3Dk8zhycdIcHjEFb36IS8HVUVFoMg0= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 h1:moLQUoVq91LiqT1nbvzDukyqAlCv89ZmwaHw/ZFlFZg= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15/go.mod h1:ZH34PJUc8ApjBIfgQCFvkWcUDBtl/WTD+uiYHjd8igA= -github.com/aws/aws-sdk-go-v2/service/s3 v1.80.0 h1:fV4XIU5sn/x8gjRouoJpDVHj+ExJaUk4prYF+eb6qTs= -github.com/aws/aws-sdk-go-v2/service/s3 v1.80.0/go.mod h1:qbn305Je/IofWBJ4bJz/Q7pDEtnnoInw/dGt71v6rHE= -github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 h1:1Gw+9ajCV1jogloEv1RRnvfRFia2cL6c9cuKV2Ps+G8= -github.com/aws/aws-sdk-go-v2/service/sso v1.25.3/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 h1:hXmVKytPfTy5axZ+fYbR5d0cFmC3JvwLm5kM83luako= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 h1:1XuUZ8mYJw9B6lzAkXhqHlJd/XvaX32evhproijJEZY= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.19/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4= -github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k= -github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.36 h1:GMYy2EOWfzdP3wfVAGXBNKY5vK4K8vMET4sYOYltmqs= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.36/go.mod h1:gDhdAV6wL3PmPqBhiPbnlS447GoWs8HTTOYef9/9Inw= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 h1:CXV68E2dNqhuynZJPB80bhPQwAKqBWVer887figW6Jc= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4/go.mod h1:/xFi9KtvBXP97ppCz1TAEvU1Uf66qvid89rbem3wCzQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.4 h1:nAP2GYbfh8dd2zGZqFRSMlq+/F6cMPBUuCsGAMkN074= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.4/go.mod h1:LT10DsiGjLWh4GbjInf9LQejkYEhBgBCjLG5+lvk4EE= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17 h1:t0E6FzREdtCsiLIoLCWsYliNsRBgyGD/MCK571qk4MI= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17/go.mod h1:ygpklyoaypuyDvOM5ujWGrYWpAK3h7ugnmKCU/76Ys4= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.17 h1:qcLWgdhq45sDM9na4cvXax9dyLitn8EYBRl8Ak4XtG4= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.17/go.mod h1:M+jkjBFZ2J6DJrjMv2+vkBbuht6kxJYtJiwoVgX4p4U= +github.com/aws/aws-sdk-go-v2/service/s3 v1.81.0 h1:1GmCadhKR3J2sMVKs2bAYq9VnwYeCqfRyZzD4RASGlA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.81.0/go.mod h1:kUklwasNoCn5YpyAqC/97r6dzTA1SRKJfKq16SXeoDU= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.5 h1:AIRJ3lfb2w/1/8wOOSqYb9fUKGwQbtysJ2H1MofRUPg= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.5/go.mod h1:b7SiVprpU+iGazDUqvRSLf5XmCdn+JtT1on7uNL6Ipc= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3 h1:BpOxT3yhLwSJ77qIY3DoHAQjZsc4HEGfMCE4NGy3uFg= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3/go.mod h1:vq/GQR1gOFLquZMSrxUK/cpvKCNVYibNyJ1m7JrU88E= +github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 h1:NFOJ/NXEGV4Rq//71Hs1jC/NvPs1ezajK+yQmkwnPV0= +github.com/aws/aws-sdk-go-v2/service/sts v1.34.0/go.mod h1:7ph2tGpfQvwzgistp2+zga9f+bCjlQJPkPUmMgDSD7w= +github.com/aws/smithy-go v1.22.4 h1:uqXzVZNuNexwc/xrh6Tb56u89WDlJY6HS+KC0S4QSjw= +github.com/aws/smithy-go v1.22.4/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= -github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42 h1:Om6kYQYDUk5wWbT0t0q6pvyM49i9XZAv9dDrkDA7gjk= -github.com/cncf/xds/go v0.0.0-20250121191232-2f005788dc42/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= +github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f h1:C5bqEmzEPLsHm9Mv73lSE9e9bKV23aB1vxOsmZrkl3k= +github.com/cncf/xds/go v0.0.0-20250326154945-ae57f3c0d45f/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= github.com/danieljoos/wincred v1.2.2 h1:774zMFJrqaeYCK2W57BgAem/MLi6mtSE47MB6BOJ0i0= github.com/danieljoos/wincred v1.2.2/go.mod h1:w7w4Utbrz8lqeMbDAK0lkNJUv5sAOkFi7nd/ogr0Uh8= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -123,8 +123,8 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/gabriel-vasile/mimetype v1.4.9 h1:5k+WDwEsD9eTLL8Tz3L0VnmVh9QxGjRmjBvAG7U/oYY= github.com/gabriel-vasile/mimetype v1.4.9/go.mod h1:WnSQhFKJuBlRyLiKohA/2DtIlPFAbguNaG7QCHcyGok= -github.com/go-jose/go-jose/v4 v4.0.4 h1:VsjPI33J0SB9vQM6PLmNjoHqMQNGPiZ0rHL7Ni7Q6/E= -github.com/go-jose/go-jose/v4 v4.0.4/go.mod h1:NKb5HO1EZccyMpiZNbdUw/14tiXNyUJh188dfnMCAfc= +github.com/go-jose/go-jose/v4 v4.0.5 h1:M6T8+mKZl/+fNNuFHvGIzDz7BTLQPIounk/b9dw3AaE= +github.com/go-jose/go-jose/v4 v4.0.5/go.mod h1:s3P1lRrkT8igV8D9OjyL4WRyHvjB6a4JSllnOrmmBOA= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -260,32 +260,32 @@ go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKr go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= -golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= +golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b h1:QoALfVG9rhQ/M7vYDScfPdWjGL9dlsVVM5VGh7aKoAA= -golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= +golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= +golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= -golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= +golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= +golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= -golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= -golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -298,42 +298,42 @@ golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= -golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= -golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= -golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= -golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= +golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhSt0ABwskkZKjD3bXGnZGpNY= golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/api v0.235.0 h1:C3MkpQSRxS1Jy6AkzTGKKrpSCOd2WOGrezZ+icKSkKo= -google.golang.org/api v0.235.0/go.mod h1:QpeJkemzkFKe5VCE/PMv7GsUfn9ZF+u+q1Q7w6ckxTg= +google.golang.org/api v0.238.0 h1:+EldkglWIg/pWjkq97sd+XxH7PxakNYoe/rkSTbnvOs= +google.golang.org/api v0.238.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20250528174236-200df99c418a h1:KXuwdBmgjb4T3l4ZzXhP6HxxFKXD9FcK5/8qfJI4WwU= -google.golang.org/genproto v0.0.0-20250528174236-200df99c418a/go.mod h1:Nlk93rrS2X7rV8hiC2gh2A/AJspZhElz9Oh2KGsjLEY= -google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a h1:SGktgSolFCo75dnHJF2yMvnns6jCmHFJ0vE4Vn2JKvQ= -google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a/go.mod h1:a77HrdMjoeKbnd2jmgcWdaS++ZLZAEq3orIOAEIKiVw= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a h1:v2PbRU4K3llS09c7zodFpNePeamkAwG3mPrAery9VeE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4= +google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s= +google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 h1:oWVWY3NzT7KJppx2UKhKmzPq4SRe0LdCijVRwvGeikY= +google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822/go.mod h1:h3c4v36UTKzUiuaOKQ6gr3S+0hovBtUrXzTG/i3+XEc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 h1:fc6jSaCT0vBduLYZHYrBBNY4dsWuvgyff9noRNDdBeE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQciAY= google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.72.2 h1:TdbGzwb82ty4OusHWepvFWGLgIbNo1/SUynEN0ssqv8= -google.golang.org/grpc v1.72.2/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= +google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= +google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= diff --git a/e2e_cleanup/main.go b/e2e_cleanup/main.go index 15102d9966..a1241bee71 100644 --- a/e2e_cleanup/main.go +++ b/e2e_cleanup/main.go @@ -22,6 +22,12 @@ import ( "google.golang.org/api/option" ) +func CheckedClose(closer io.Closer) { + if err := closer.Close(); err != nil { + panic(err) + } +} + // from flow/shared/crypto.go func DecodePKCS8PrivateKey(rawKey []byte, password *string) (*rsa.PrivateKey, error) { PEMBlock, _ := pem.Decode(rawKey) @@ -49,7 +55,7 @@ func ParseJsonKeyVal[T any](path string) (T, error) { if err != nil { return result, fmt.Errorf("failed to open file: %w", err) } - defer f.Close() + defer CheckedClose(f) jsonContent, err := io.ReadAll(f) if err != nil { @@ -91,7 +97,7 @@ func CleanupBQ(ctx context.Context) { if err != nil { panic(err) } - defer client.Close() + defer CheckedClose(client) datasets := client.Datasets(ctx) datasetPrefix := config["dataset_id"] @@ -120,7 +126,7 @@ func CleanupBQ(ctx context.Context) { if err != nil { panic(err) } - defer psclient.Close() + defer CheckedClose(psclient) topics := psclient.Topics(ctx) for { @@ -189,7 +195,7 @@ func CleanupSF(ctx context.Context) { if err != nil { panic(err) } - defer database.Close() + defer CheckedClose(database) _, err = database.ExecContext(ctx, `DECLARE c CURSOR FOR SELECT database_name FROM INFORMATION_SCHEMA.DATABASES WHERE database_name ILIKE 'E2E_TEST_%' AND created < timeadd('hour', -2, CURRENT_DATE); diff --git a/flow/activities/flowable.go b/flow/activities/flowable.go index c9b6f5c816..ae8016a184 100644 --- a/flow/activities/flowable.go +++ b/flow/activities/flowable.go @@ -69,7 +69,7 @@ func (a *FlowableActivity) CheckConnection( config *protos.SetupInput, ) error { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowName) - conn, err := connectors.GetByNameAs[connectors.CDCSyncConnector](ctx, config.Env, a.CatalogPool, config.PeerName) + conn, err := connectors.GetByNameAs[connectors.Connector](ctx, config.Env, a.CatalogPool, config.PeerName) if err != nil { if errors.Is(err, errors.ErrUnsupported) { return nil @@ -177,7 +177,7 @@ func (a *FlowableActivity) SetupTableSchema( } defer connectors.CloseConnector(ctx, srcConn) - tableNameSchemaMapping, err := srcConn.GetTableSchema(ctx, config.Env, config.System, config.TableMappings) + tableNameSchemaMapping, err := srcConn.GetTableSchema(ctx, config.Env, config.Version, config.System, config.TableMappings) if err != nil { return a.Alerter.LogFlowError(ctx, config.FlowName, fmt.Errorf("failed to get GetTableSchemaConnector: %w", err)) } @@ -248,7 +248,9 @@ func (a *FlowableActivity) CreateNormalizedTable( numTablesToSetup.Store(int32(len(tableNameSchemaMapping))) tableExistsMapping := make(map[string]bool, len(tableNameSchemaMapping)) - for tableIdentifier, tableSchema := range tableNameSchemaMapping { + for _, tableMapping := range config.TableMappings { + tableIdentifier := tableMapping.DestinationTableIdentifier + tableSchema := tableNameSchemaMapping[tableIdentifier] existing, err := conn.SetupNormalizedTable( ctx, tx, diff --git a/flow/activities/flowable_core.go b/flow/activities/flowable_core.go index 7614ed2b0e..1405f82cac 100644 --- a/flow/activities/flowable_core.go +++ b/flow/activities/flowable_core.go @@ -99,6 +99,7 @@ func (a *FlowableActivity) applySchemaDeltas( FlowName: config.FlowJobName, System: config.System, Env: config.Env, + Version: config.Version, }); err != nil { return a.Alerter.LogFlowError(ctx, config.FlowJobName, fmt.Errorf("failed to execute schema update at source: %w", err)) } @@ -195,6 +196,7 @@ func syncCore[TPull connectors.CDCPullConnectorCore, TSync connectors.CDCSyncCon OverrideReplicationSlotName: config.ReplicationSlotName, RecordStream: recordBatchPull, Env: config.Env, + InternalVersion: config.Version, }) }) @@ -264,10 +266,15 @@ func syncCore[TPull connectors.CDCPullConnectorCore, TSync connectors.CDCSyncCon StagingPath: config.CdcStagingPath, Script: config.Script, TableNameSchemaMapping: tableNameSchemaMapping, + Env: config.Env, + Version: config.Version, }) if err != nil { return a.Alerter.LogFlowError(ctx, flowName, fmt.Errorf("failed to push records: %w", err)) } + for _, warning := range res.Warnings { + a.Alerter.LogFlowWarning(ctx, flowName, warning) + } logger.Info("finished pulling records for batch", slog.Int64("SyncBatchID", syncBatchID)) return nil @@ -399,7 +406,7 @@ func replicateQRepPartition[TRead any, TWrite StreamCloser, TSync connectors.QRe *protos.QRepPartition, TWrite, ) (int64, int64, error), - syncRecords func(TSync, context.Context, *protos.QRepConfig, *protos.QRepPartition, TRead) (int64, error), + syncRecords func(TSync, context.Context, *protos.QRepConfig, *protos.QRepPartition, TRead) (int64, shared.QRepWarnings, error), ) error { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) logger := log.With(internal.LoggerFromCtx(ctx), slog.String(string(shared.FlowNameKey), config.FlowJobName)) @@ -427,7 +434,7 @@ func replicateQRepPartition[TRead any, TWrite StreamCloser, TSync connectors.QRe return a.Alerter.LogFlowError(ctx, config.FlowJobName, fmt.Errorf("failed to update start time for partition: %w", err)) } - logger.Info("replicating partition " + partition.PartitionId) + logger.Info("replicating partition", slog.String("partitionId", partition.PartitionId)) var rowsSynced int64 errGroup, errCtx := errgroup.WithContext(ctx) @@ -455,11 +462,15 @@ func replicateQRepPartition[TRead any, TWrite StreamCloser, TSync connectors.QRe }) errGroup.Go(func() error { + var warnings shared.QRepWarnings var err error - rowsSynced, err = syncRecords(dstConn, errCtx, config, partition, outstream) + rowsSynced, warnings, err = syncRecords(dstConn, errCtx, config, partition, outstream) if err != nil { return a.Alerter.LogFlowError(ctx, config.FlowJobName, fmt.Errorf("failed to sync records: %w", err)) } + for _, warning := range warnings { + a.Alerter.LogFlowWarning(ctx, config.FlowJobName, warning) + } return context.Canceled }) @@ -492,7 +503,7 @@ func replicateXminPartition[TRead any, TWrite any, TSync connectors.QRepSyncConn *protos.QRepPartition, TWrite, ) (int64, int64, int64, error), - syncRecords func(TSync, context.Context, *protos.QRepConfig, *protos.QRepPartition, TRead) (int64, error), + syncRecords func(TSync, context.Context, *protos.QRepConfig, *protos.QRepPartition, TRead) (int64, shared.QRepWarnings, error), ) (int64, error) { ctx = context.WithValue(ctx, shared.FlowNameKey, config.FlowJobName) logger := internal.LoggerFromCtx(ctx) @@ -561,10 +572,14 @@ func replicateXminPartition[TRead any, TWrite any, TSync connectors.QRepSyncConn } defer connectors.CloseConnector(ctx, dstConn) - rowsSynced, err = syncRecords(dstConn, ctx, config, partition, outstream) + var warnings shared.QRepWarnings + rowsSynced, warnings, err = syncRecords(dstConn, ctx, config, partition, outstream) if err != nil { return a.Alerter.LogFlowError(ctx, config.FlowJobName, fmt.Errorf("failed to sync records: %w", err)) } + for _, warning := range warnings { + a.Alerter.LogFlowWarning(ctx, config.FlowJobName, warning) + } return context.Canceled }) @@ -642,6 +657,7 @@ func (a *FlowableActivity) startNormalize( SoftDeleteColName: config.SoftDeleteColName, SyncedAtColName: config.SyncedAtColName, SyncBatchID: batchID, + Version: config.Version, }) if err != nil { return a.Alerter.LogFlowError(ctx, config.FlowJobName, diff --git a/flow/alerting/alerting.go b/flow/alerting/alerting.go index 5abd09b9a7..b4578b9753 100644 --- a/flow/alerting/alerting.go +++ b/flow/alerting/alerting.go @@ -433,17 +433,17 @@ func (a *Alerter) LogNonFlowEvent(ctx context.Context, eventType telemetry.Event a.sendTelemetryMessage(ctx, logger, string(eventType)+":"+key, message, level) } -// LogFlowError pushes the error to the errors table and emits a metric as well as a telemetry message -func (a *Alerter) LogFlowError(ctx context.Context, flowName string, inErr error) error { - errorWithStack := fmt.Sprintf("%+v", inErr) +// logFlowErrorInternal pushes the error to the errors table and emits a metric as well as a telemetry message +func (a *Alerter) logFlowErrorInternal(ctx context.Context, flowName, errorType string, inErr error, loggerFunc func(string, ...any)) { logger := internal.LoggerFromCtx(ctx) - logger.Error(inErr.Error(), slog.Any("stack", errorWithStack)) + inErrWithStack := fmt.Sprintf("%+v", inErr) + loggerFunc(inErr.Error(), slog.String("stack", inErrWithStack)) if _, err := a.CatalogPool.Exec( ctx, "INSERT INTO peerdb_stats.flow_errors(flow_name,error_message,error_type) VALUES($1,$2,$3)", - flowName, errorWithStack, "error", + flowName, inErrWithStack, errorType, ); err != nil { logger.Error("failed to insert flow error", slog.Any("error", err)) - return inErr + return } var tags []string @@ -482,8 +482,14 @@ func (a *Alerter) LogFlowError(ctx context.Context, flowName string, inErr error tags = append(tags, "errorClass:"+errorClass.String(), "errorAction:"+errorClass.ErrorAction().String()) if !internal.PeerDBTelemetryErrorActionBasedAlertingEnabled() || errorClass.ErrorAction() == NotifyTelemetry { - a.sendTelemetryMessage(ctx, logger, flowName, errorWithStack, telemetry.ERROR, tags...) - } + // Warnings alert us just like errors until there's a customer warning system + a.sendTelemetryMessage(ctx, logger, flowName, inErrWithStack, telemetry.ERROR, tags...) + } + loggerFunc(fmt.Sprintf("Emitting classified error '%s'", inErr.Error()), + slog.Any("error", inErr), + slog.Any("errorClass", errorClass), + slog.Any("errorInfo", errInfo), + slog.Any("stack", inErrWithStack)) errorAttributeSet := metric.WithAttributeSet(attribute.NewSet( attribute.Stringer(otel_metrics.ErrorClassKey, errorClass), attribute.Stringer(otel_metrics.ErrorActionKey, errorClass.ErrorAction()), @@ -492,10 +498,19 @@ func (a *Alerter) LogFlowError(ctx context.Context, flowName string, inErr error )) a.otelManager.Metrics.ErrorsEmittedCounter.Add(ctx, 1, errorAttributeSet) a.otelManager.Metrics.ErrorEmittedGauge.Record(ctx, 1, errorAttributeSet) +} +func (a *Alerter) LogFlowError(ctx context.Context, flowName string, inErr error) error { + logger := internal.LoggerFromCtx(ctx) + a.logFlowErrorInternal(ctx, flowName, "error", inErr, logger.Error) return inErr } +func (a *Alerter) LogFlowWarning(ctx context.Context, flowName string, inErr error) { + logger := internal.LoggerFromCtx(ctx) + a.logFlowErrorInternal(ctx, flowName, "warn", inErr, logger.Warn) +} + func (a *Alerter) LogFlowEvent(ctx context.Context, flowName string, info string) { logger := internal.LoggerFromCtx(ctx) logger.Info(info) diff --git a/flow/alerting/classifier.go b/flow/alerting/classifier.go index 0dc74dea9f..80200f17bd 100644 --- a/flow/alerting/classifier.go +++ b/flow/alerting/classifier.go @@ -61,6 +61,10 @@ func (e ErrorSource) String() string { return string(e) } +func AvroConverterTableColumnErrorSource(destinationTable, destinationColumn string) ErrorSource { + return ErrorSource(fmt.Sprintf("avroConverter:column:%s.%s", destinationTable, destinationColumn)) +} + type ErrorInfo struct { Source ErrorSource Code string @@ -108,10 +112,6 @@ var ( ErrorNotifyTerminate = ErrorClass{ Class: "NOTIFY_TERMINATE", action: NotifyUser, } - ErrorNotifyConnectTimeout = ErrorClass{ - // TODO(this is mostly done via NOTIFY_CONNECTIVITY, will remove later if not needed) - Class: "NOTIFY_CONNECT_TIMEOUT", action: NotifyUser, - } ErrorInternal = ErrorClass{ Class: "INTERNAL", action: NotifyTelemetry, } @@ -134,6 +134,9 @@ var ( ErrorInternalClickHouse = ErrorClass{ Class: "INTERNAL_CLICKHOUSE", action: NotifyTelemetry, } + ErrorLossyConversion = ErrorClass{ + Class: "WARNING_LOSSY_CONVERSION", action: NotifyTelemetry, + } ErrorOther = ErrorClass{ // These are unclassified and should not be exposed Class: "OTHER", action: NotifyTelemetry, @@ -232,7 +235,9 @@ func GetErrorClass(ctx context.Context, err error) (ErrorClass, ErrorInfo) { pgerrcode.InvalidPassword, pgerrcode.InsufficientPrivilege, pgerrcode.UndefinedTable, - pgerrcode.CannotConnectNow: + pgerrcode.CannotConnectNow, + pgerrcode.ConfigurationLimitExceeded, + pgerrcode.DiskFull: return ErrorNotifyConnectivity, pgErrorInfo case pgerrcode.UndefinedObject: @@ -251,7 +256,11 @@ func GetErrorClass(ctx context.Context, err error) (ErrorClass, ErrorInfo) { (strings.HasPrefix(pgErr.Message, "could not stat file ") && strings.HasSuffix(pgErr.Message, "Stale file handle")) || // Below error is transient and Aurora Specific - (strings.HasPrefix(pgErr.Message, "Internal error encountered during logical decoding")) { + (strings.HasPrefix(pgErr.Message, "Internal error encountered during logical decoding")) || + //nolint:lll + // Handle missing record during logical decoding + // https://github.com/postgres/postgres/blob/a0c7b765372d949cec54960dafcaadbc04b3204e/src/backend/access/transam/xlogreader.c#L921 + strings.HasPrefix(pgErr.Message, "could not find record while sending logically-decoded data") { return ErrorRetryRecoverable, pgErrorInfo } @@ -264,10 +273,11 @@ func GetErrorClass(ctx context.Context, err error) (ErrorClass, ErrorInfo) { return ErrorOther, pgErrorInfo case pgerrcode.ObjectNotInPrerequisiteState: - // same underlying error but 2 different messages + // same underlying error but 3 different messages // based on PG version, newer ones have second error if strings.Contains(pgErr.Message, "cannot read from logical replication slot") || - strings.Contains(pgErr.Message, "can no longer get changes from replication slot") { + strings.Contains(pgErr.Message, "can no longer get changes from replication slot") || + strings.Contains(pgErr.Message, "could not import the requested snapshot") { return ErrorNotifySlotInvalid, pgErrorInfo } @@ -275,6 +285,10 @@ func GetErrorClass(ctx context.Context, err error) (ErrorClass, ErrorInfo) { if strings.Contains(pgErr.Message, "invalid snapshot identifier") { return ErrorNotifyInvalidSnapshotIdentifier, pgErrorInfo } + case pgerrcode.SerializationFailure, pgerrcode.DeadlockDetected: + if strings.Contains(pgErr.Message, "canceling statement due to conflict with recovery") { + return ErrorNotifyConnectivity, pgErrorInfo + } case pgerrcode.TooManyConnections, // Maybe we can return something else? pgerrcode.ConnectionException, @@ -416,7 +430,7 @@ func GetErrorClass(ctx context.Context, err error) (ErrorClass, ErrorInfo) { } else if isClickHouseMvError(chException) { return ErrorNotifyMVOrView, chErrorInfo } - case chproto.ErrQueryWasCancelled: + case chproto.ErrQueryWasCancelled, chproto.ErrPocoException: return ErrorRetryRecoverable, chErrorInfo default: if isClickHouseMvError(chException) { @@ -489,6 +503,22 @@ func GetErrorClass(ctx context.Context, err error) (ErrorClass, ErrorInfo) { } } + var numericOutOfRangeError *exceptions.NumericOutOfRangeError + if errors.As(err, &numericOutOfRangeError) { + return ErrorLossyConversion, ErrorInfo{ + Source: AvroConverterTableColumnErrorSource(numericOutOfRangeError.DestinationTable, numericOutOfRangeError.DestinationColumn), + Code: "NUMERIC_OUT_OF_RANGE", + } + } + + var numericTruncatedError *exceptions.NumericTruncatedError + if errors.As(err, &numericTruncatedError) { + return ErrorLossyConversion, ErrorInfo{ + Source: AvroConverterTableColumnErrorSource(numericTruncatedError.DestinationTable, numericTruncatedError.DestinationColumn), + Code: "NUMERIC_TRUNCATED", + } + } + return ErrorOther, ErrorInfo{ Source: ErrorSourceOther, Code: "UNKNOWN", diff --git a/flow/alerting/classifier_test.go b/flow/alerting/classifier_test.go index 697b701578..2e5f889ff5 100644 --- a/flow/alerting/classifier_test.go +++ b/flow/alerting/classifier_test.go @@ -313,3 +313,38 @@ func TestPeerCreateTimeoutErrorShouldBeConnectivity(t *testing.T) { Code: "CONTEXT_DEADLINE_EXCEEDED", }, errInfo, "Unexpected error info") } + +func TestPostgresCouldNotFindRecordWalErrorShouldBeRecoverable(t *testing.T) { + // Simulate a "could not find record while sending logically-decoded data" error + err := &exceptions.PostgresWalError{ + Msg: &pgproto3.ErrorResponse{ + Severity: "ERROR", + Code: pgerrcode.InternalError, + Message: "could not find record while sending logically-decoded data: missing contrecord at 6410/14023FF0", + }, + } + errorClass, errInfo := GetErrorClass(t.Context(), fmt.Errorf("error in WAL: %w", err)) + assert.Equal(t, ErrorRetryRecoverable, errorClass, "Unexpected error class") + assert.Equal(t, ErrorInfo{ + Source: ErrorSourcePostgres, + Code: pgerrcode.InternalError, + }, errInfo, "Unexpected error info") +} + +func TestPostgresConnectionRefusedErrorShouldBeConnectivity(t *testing.T) { + config, err := pgx.ParseConfig("postgres://localhost:1001/db") + require.NoError(t, err) + _, err = pgx.ConnectConfig(t.Context(), config) + require.Error(t, err, "Expected connection refused error") + t.Logf("Error: %v", err) + for _, e := range []error{err, exceptions.NewPeerCreateError(err)} { + t.Run(fmt.Sprintf("Testing error: %T", e), func(t *testing.T) { + errorClass, errInfo := GetErrorClass(t.Context(), err) + assert.Equal(t, ErrorNotifyConnectivity, errorClass, "Unexpected error class") + assert.Equal(t, ErrorInfo{ + Source: ErrorSourcePostgres, + Code: "UNKNOWN", + }, errInfo, "Unexpected error info") + }) + } +} diff --git a/flow/cmd/api.go b/flow/cmd/api.go index a6c72a59bc..ef9c0ec62d 100644 --- a/flow/cmd/api.go +++ b/flow/cmd/api.go @@ -2,7 +2,6 @@ package cmd import ( "context" - "crypto/tls" "fmt" "log" "log/slog" @@ -311,25 +310,3 @@ func APIMain(ctx context.Context, args *APIServerParams) error { return nil } - -func setupTemporalClient(ctx context.Context, clientOptions client.Options) (client.Client, error) { - if internal.PeerDBTemporalEnableCertAuth() { - slog.Info("Using temporal certificate/key for authentication") - - certs, err := parseTemporalCertAndKey(ctx) - if err != nil { - return nil, fmt.Errorf("unable to base64 decode certificate and key: %w", err) - } - - connOptions := client.ConnectionOptions{ - TLS: &tls.Config{ - Certificates: certs, - MinVersion: tls.VersionTLS13, - }, - } - clientOptions.ConnectionOptions = connOptions - } - - tc, err := client.Dial(clientOptions) - return tc, err -} diff --git a/flow/cmd/cert.go b/flow/cmd/cert.go index 01ab3c3ac1..b11a56d253 100644 --- a/flow/cmd/cert.go +++ b/flow/cmd/cert.go @@ -4,11 +4,14 @@ import ( "context" "crypto/tls" "fmt" + "log/slog" + + "go.temporal.io/sdk/client" "github.com/PeerDB-io/peerdb/flow/internal" ) -func parseTemporalCertAndKey(ctx context.Context) ([]tls.Certificate, error) { +func parseTemporalCertAndKeyFromEnvironment(ctx context.Context) ([]tls.Certificate, error) { certBytes, err := internal.PeerDBTemporalClientCert(ctx) if err != nil { return nil, fmt.Errorf("unable to get temporal certificate: %w", err) @@ -26,3 +29,40 @@ func parseTemporalCertAndKey(ctx context.Context) ([]tls.Certificate, error) { return []tls.Certificate{keyPair}, nil } + +func setupTemporalClient(ctx context.Context, clientOptions client.Options) (client.Client, error) { + if certPath := internal.PeerDBTemporalClientCertPath(); certPath != "" { + slog.Info("Using temporal certificate/key from paths for authentication") + keyPath := internal.PeerDBTemporalClientKeyPath() + + clientOptions.ConnectionOptions = client.ConnectionOptions{ + TLS: &tls.Config{ + GetClientCertificate: func(*tls.CertificateRequestInfo) (*tls.Certificate, error) { + keyPairValue, err := tls.LoadX509KeyPair(certPath, keyPath) + if err != nil { + return nil, fmt.Errorf("unable to obtain temporal key pair: %w", err) + } + return &keyPairValue, nil + }, + MinVersion: tls.VersionTLS13, + }, + } + } else if internal.PeerDBTemporalEnableCertAuth() { + slog.Info("Using temporal certificate/key from environment for authentication") + + certs, err := parseTemporalCertAndKeyFromEnvironment(ctx) + if err != nil { + return nil, fmt.Errorf("unable to base64 decode certificate and key: %w", err) + } + + clientOptions.ConnectionOptions = client.ConnectionOptions{ + TLS: &tls.Config{ + Certificates: certs, + MinVersion: tls.VersionTLS13, + }, + } + } + + tc, err := client.Dial(clientOptions) + return tc, err +} diff --git a/flow/cmd/handler.go b/flow/cmd/handler.go index 33211fdcfb..5db1d56749 100644 --- a/flow/cmd/handler.go +++ b/flow/cmd/handler.go @@ -128,6 +128,7 @@ func (h *FlowRequestHandler) CreateCDCFlow( ctx context.Context, req *protos.CreateCDCFlowRequest, ) (*protos.CreateCDCFlowResponse, error) { cfg := req.ConnectionConfigs + cfg.Version = shared.InternalVersion_Latest // For resync, we validate the mirror before dropping it and getting to this step. // There is no point validating again here if it's a resync - the mirror is dropped already @@ -176,6 +177,8 @@ func (h *FlowRequestHandler) CreateQRepFlow( ctx context.Context, req *protos.CreateQRepFlowRequest, ) (*protos.CreateQRepFlowResponse, error) { cfg := req.QrepConfig + cfg.Version = shared.InternalVersion_Latest + workflowID := fmt.Sprintf("%s-qrepflow-%s", cfg.FlowJobName, uuid.New()) workflowOptions := client.StartWorkflowOptions{ ID: workflowID, diff --git a/flow/cmd/peer_data.go b/flow/cmd/peer_data.go index ee114b3240..02bea196b5 100644 --- a/flow/cmd/peer_data.go +++ b/flow/cmd/peer_data.go @@ -4,6 +4,7 @@ import ( "context" "database/sql" "errors" + "fmt" "log/slog" "time" @@ -17,6 +18,7 @@ import ( connpostgres "github.com/PeerDB-io/peerdb/flow/connectors/postgres" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared" ) func redactProto(message proto.Message) { @@ -93,8 +95,9 @@ func (h *FlowRequestHandler) ListPeers( ) (*protos.ListPeersResponse, error) { query := "SELECT name, type FROM peers" if internal.PeerDBOnlyClickHouseAllowed() { - // only postgres, mysql, and clickhouse - query += " WHERE type IN (3, 7, 8)" + // only postgres, mysql, mongo,and clickhouse + query += fmt.Sprintf(" WHERE type IN (%d,%d,%d,%d)", + protos.DBType_POSTGRES, protos.DBType_MYSQL, protos.DBType_MONGO, protos.DBType_CLICKHOUSE) } rows, err := h.pool.Query(ctx, query) if err != nil { @@ -113,10 +116,11 @@ func (h *FlowRequestHandler) ListPeers( sourceItems := make([]*protos.PeerListItem, 0, len(peers)) destinationItems := make([]*protos.PeerListItem, 0, len(peers)) for _, peer := range peers { - if peer.Type == protos.DBType_POSTGRES || peer.Type == protos.DBType_MYSQL { + if peer.Type == protos.DBType_POSTGRES || peer.Type == protos.DBType_MYSQL || peer.Type == protos.DBType_MONGO { sourceItems = append(sourceItems, peer) } - if peer.Type != protos.DBType_MYSQL && (!internal.PeerDBOnlyClickHouseAllowed() || peer.Type == protos.DBType_CLICKHOUSE) { + if peer.Type != protos.DBType_MYSQL && + peer.Type != protos.DBType_MONGO && (!internal.PeerDBOnlyClickHouseAllowed() || peer.Type == protos.DBType_CLICKHOUSE) { destinationItems = append(destinationItems, peer) } } @@ -174,7 +178,7 @@ func (h *FlowRequestHandler) GetColumns( return nil, err } defer connectors.CloseConnector(ctx, conn) - return conn.GetColumns(ctx, req.SchemaName, req.TableName) + return conn.GetColumns(ctx, shared.InternalVersion_Latest, req.SchemaName, req.TableName) } func (h *FlowRequestHandler) GetColumnsTypeConversion( diff --git a/flow/cmd/snapshot_worker.go b/flow/cmd/snapshot_worker.go index c2ca331616..d6ca3bd31f 100644 --- a/flow/cmd/snapshot_worker.go +++ b/flow/cmd/snapshot_worker.go @@ -2,7 +2,6 @@ package cmd import ( "context" - "crypto/tls" "fmt" "log/slog" "os" @@ -37,6 +36,11 @@ func SnapshotWorkerMain(ctx context.Context, opts *SnapshotWorkerOptions) (*Work }, } + conn, err := internal.GetCatalogConnectionPoolFromEnv(ctx) + if err != nil { + return nil, fmt.Errorf("unable to create catalog connection pool: %w", err) + } + metricsProvider, metricsErr := otel_metrics.SetupTemporalMetricsProvider( ctx, otel_metrics.FlowSnapshotWorkerServiceName, opts.EnableOtelMetrics) if metricsErr != nil { @@ -46,28 +50,7 @@ func SnapshotWorkerMain(ctx context.Context, opts *SnapshotWorkerOptions) (*Work Meter: metricsProvider.Meter("temporal-sdk-go"), }) - if internal.PeerDBTemporalEnableCertAuth() { - slog.Info("Using temporal certificate/key for authentication") - certs, err := parseTemporalCertAndKey(ctx) - if err != nil { - return nil, fmt.Errorf("unable to process certificate and key: %w", err) - } - - connOptions := client.ConnectionOptions{ - TLS: &tls.Config{ - Certificates: certs, - MinVersion: tls.VersionTLS13, - }, - } - clientOptions.ConnectionOptions = connOptions - } - - conn, err := internal.GetCatalogConnectionPoolFromEnv(ctx) - if err != nil { - return nil, fmt.Errorf("unable to create catalog connection pool: %w", err) - } - - c, err := client.Dial(clientOptions) + c, err := setupTemporalClient(ctx, clientOptions) if err != nil { return nil, fmt.Errorf("unable to create Temporal client: %w", err) } diff --git a/flow/cmd/validate_mirror.go b/flow/cmd/validate_mirror.go index a58f32d18f..147f3e150c 100644 --- a/flow/cmd/validate_mirror.go +++ b/flow/cmd/validate_mirror.go @@ -16,10 +16,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/shared/telemetry" ) -var ( - CustomColumnTypeRegex = regexp.MustCompile(`^$|^[a-zA-Z][a-zA-Z0-9(),]*$`) - CustomColumnNameRegex = regexp.MustCompile(`^$|^[a-zA-Z_][a-zA-Z0-9_]*$`) -) +var CustomColumnTypeRegex = regexp.MustCompile(`^$|^[a-zA-Z][a-zA-Z0-9(),]*$`) func (h *FlowRequestHandler) ValidateCDCMirror( ctx context.Context, req *protos.CreateCDCFlowRequest, @@ -59,9 +56,6 @@ func (h *FlowRequestHandler) ValidateCDCMirror( if !CustomColumnTypeRegex.MatchString(col.DestinationType) { return nil, fmt.Errorf("invalid custom column type %s", col.DestinationType) } - if !CustomColumnNameRegex.MatchString(col.DestinationName) { - return nil, fmt.Errorf("invalid custom column name %s", col.DestinationName) - } } } @@ -102,7 +96,8 @@ func (h *FlowRequestHandler) ValidateCDCMirror( } defer connectors.CloseConnector(ctx, dstConn) - res, err := srcConn.GetTableSchema(ctx, nil, req.ConnectionConfigs.System, req.ConnectionConfigs.TableMappings) + res, err := srcConn.GetTableSchema(ctx, req.ConnectionConfigs.Env, req.ConnectionConfigs.Version, + req.ConnectionConfigs.System, req.ConnectionConfigs.TableMappings) if err != nil { return nil, fmt.Errorf("failed to get source table schema: %w", err) } diff --git a/flow/cmd/worker.go b/flow/cmd/worker.go index d5febc17c7..f370360c2f 100644 --- a/flow/cmd/worker.go +++ b/flow/cmd/worker.go @@ -2,7 +2,6 @@ package cmd import ( "context" - "crypto/tls" "fmt" "log" "log/slog" @@ -81,6 +80,11 @@ func WorkerSetup(ctx context.Context, opts *WorkerSetupOptions) (*WorkerSetupRes setupPprof(opts) } + conn, err := internal.GetCatalogConnectionPoolFromEnv(ctx) + if err != nil { + return nil, fmt.Errorf("unable to create catalog connection pool: %w", err) + } + clientOptions := client.Options{ HostPort: opts.TemporalHostPort, Namespace: opts.TemporalNamespace, @@ -100,26 +104,7 @@ func WorkerSetup(ctx context.Context, opts *WorkerSetupOptions) (*WorkerSetupRes Meter: metricsProvider.Meter("temporal-sdk-go"), }) - if internal.PeerDBTemporalEnableCertAuth() { - slog.Info("Using temporal certificate/key for authentication") - certs, err := parseTemporalCertAndKey(ctx) - if err != nil { - return nil, fmt.Errorf("unable to process certificate and key: %w", err) - } - clientOptions.ConnectionOptions = client.ConnectionOptions{ - TLS: &tls.Config{ - Certificates: certs, - MinVersion: tls.VersionTLS13, - }, - } - } - - conn, err := internal.GetCatalogConnectionPoolFromEnv(ctx) - if err != nil { - return nil, fmt.Errorf("unable to create catalog connection pool: %w", err) - } - - c, err := client.Dial(clientOptions) + c, err := setupTemporalClient(ctx, clientOptions) if err != nil { return nil, fmt.Errorf("unable to create Temporal client: %w", err) } diff --git a/flow/connectors/bigquery/bigquery.go b/flow/connectors/bigquery/bigquery.go index f1aaeb1ac1..033eff097c 100644 --- a/flow/connectors/bigquery/bigquery.go +++ b/flow/connectors/bigquery/bigquery.go @@ -365,8 +365,10 @@ func (c *BigQueryConnector) syncRecordsViaAvro( syncBatchID int64, ) (*model.SyncResponse, error) { tableNameRowsMapping := utils.InitialiseTableRowsMap(req.TableMappings) - streamReq := model.NewRecordsToStreamRequest(req.Records.GetRecords(), tableNameRowsMapping, syncBatchID) - stream, err := utils.RecordsToRawTableStream(streamReq) + streamReq := model.NewRecordsToStreamRequest( + req.Records.GetRecords(), tableNameRowsMapping, syncBatchID, false, protos.DBType_BIGQUERY, + ) + stream, err := utils.RecordsToRawTableStream(streamReq, nil) if err != nil { return nil, fmt.Errorf("failed to convert records to raw table stream: %w", err) } diff --git a/flow/connectors/bigquery/merge_stmt_generator.go b/flow/connectors/bigquery/merge_stmt_generator.go index e82878c763..34286323c8 100644 --- a/flow/connectors/bigquery/merge_stmt_generator.go +++ b/flow/connectors/bigquery/merge_stmt_generator.go @@ -5,8 +5,8 @@ import ( "strings" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type mergeStmtGenerator struct { @@ -24,7 +24,7 @@ type mergeStmtGenerator struct { // generateFlattenedCTE generates a flattened CTE. func (m *mergeStmtGenerator) generateFlattenedCTE(dstTable string, normalizedTableSchema *protos.TableSchema) string { - // for each column in the normalized table, generate CAST + JSON_EXTRACT_SCALAR + // for each column in the normalized table, generate CAST + JSON_VALUE // statement. flattenedProjs := make([]string, 0, len(normalizedTableSchema.Columns)+3) @@ -33,40 +33,26 @@ func (m *mergeStmtGenerator) generateFlattenedCTE(dstTable string, normalizedTab bqTypeString := qValueKindToBigQueryTypeString(column, normalizedTableSchema.NullableEnabled, true) var castStmt string shortCol := m.shortColumn[column.Name] - switch qvalue.QValueKind(colType) { - case qvalue.QValueKindJSON, qvalue.QValueKindJSONB, qvalue.QValueKindHStore: + switch types.QValueKind(colType) { + case types.QValueKindJSON, types.QValueKindJSONB, types.QValueKindHStore: // if the type is JSON, then just extract JSON castStmt = fmt.Sprintf("CAST(PARSE_JSON(JSON_VALUE(_peerdb_data, '$.%s'),wide_number_mode=>'round') AS %s) AS `%s`", column.Name, bqTypeString, shortCol) // expecting data in BASE64 format - case qvalue.QValueKindBytes: + case types.QValueKindBytes: castStmt = fmt.Sprintf("FROM_BASE64(JSON_VALUE(_peerdb_data,'$.%s')) AS `%s`", column.Name, shortCol) - case qvalue.QValueKindArrayFloat32, qvalue.QValueKindArrayFloat64, qvalue.QValueKindArrayInt16, - qvalue.QValueKindArrayInt32, qvalue.QValueKindArrayInt64, qvalue.QValueKindArrayString, - qvalue.QValueKindArrayBoolean, qvalue.QValueKindArrayTimestamp, qvalue.QValueKindArrayTimestampTZ, - qvalue.QValueKindArrayDate, qvalue.QValueKindArrayUUID: + case types.QValueKindArrayFloat32, types.QValueKindArrayFloat64, types.QValueKindArrayInt16, + types.QValueKindArrayInt32, types.QValueKindArrayInt64, types.QValueKindArrayString, + types.QValueKindArrayBoolean, types.QValueKindArrayTimestamp, types.QValueKindArrayTimestampTZ, + types.QValueKindArrayDate, types.QValueKindArrayInterval, types.QValueKindArrayUUID, + types.QValueKindArrayNumeric: castStmt = fmt.Sprintf("ARRAY(SELECT CAST(element AS %s) FROM "+ "UNNEST(CAST(JSON_VALUE_ARRAY(_peerdb_data, '$.%s') AS ARRAY)) AS element WHERE element IS NOT null) AS `%s`", bqTypeString, column.Name, shortCol) - case qvalue.QValueKindGeography, qvalue.QValueKindGeometry, qvalue.QValueKindPoint: + case types.QValueKindGeography, types.QValueKindGeometry, types.QValueKindPoint: castStmt = fmt.Sprintf("CAST(ST_GEOGFROMTEXT(JSON_VALUE(_peerdb_data, '$.%s')) AS %s) AS `%s`", column.Name, bqTypeString, shortCol) - // MAKE_INTERVAL(years INT64, months INT64, days INT64, hours INT64, minutes INT64, seconds INT64) - // Expecting interval to be in the format of {"Microseconds":2000000,"Days":0,"Months":0,"Valid":true} - // json.Marshal in SyncRecords for Postgres already does this - once new data-stores are added, - // this needs to be handled again - // TODO add interval types again - // case model.ColumnTypeInterval: - // castStmt = fmt.Sprintf("MAKE_INTERVAL(0,CAST(JSON_EXTRACT_SCALAR(_peerdb_data, '$.%s.Months') AS INT64),"+ - // "CAST(JSON_EXTRACT_SCALAR(_peerdb_data, '$.%s.Days') AS INT64),0,0,"+ - // "CAST(CAST(JSON_EXTRACT_SCALAR(_peerdb_data, '$.%s.Microseconds') AS INT64)/1000000 AS INT64)) AS %s", - // column.Name, column.Name, column.Name, column.Name) - // TODO add proper granularity for time types, then restore this - // case model.ColumnTypeTime: - // castStmt = fmt.Sprintf("time(timestamp_micros(CAST(JSON_EXTRACT(_peerdb_data, '$.%s.Microseconds')"+ - // " AS int64))) AS %s", - // column.Name, column.Name) default: castStmt = fmt.Sprintf("CAST(JSON_VALUE(_peerdb_data, '$.%s') AS %s) AS `%s`", column.Name, bqTypeString, shortCol) @@ -90,9 +76,9 @@ func (m *mergeStmtGenerator) generateFlattenedCTE(dstTable string, normalizedTab // This function is to support datatypes like JSON which cannot be partitioned by or compared by BigQuery func (m *mergeStmtGenerator) transformedPkeyStrings(normalizedTableSchema *protos.TableSchema, forPartition bool) []string { pkeys := make([]string, 0, len(normalizedTableSchema.PrimaryKeyColumns)) - columnNameTypeMap := make(map[string]qvalue.QValueKind, len(normalizedTableSchema.Columns)) + columnNameTypeMap := make(map[string]types.QValueKind, len(normalizedTableSchema.Columns)) for _, col := range normalizedTableSchema.Columns { - columnNameTypeMap[col.Name] = qvalue.QValueKind(col.Type) + columnNameTypeMap[col.Name] = types.QValueKind(col.Type) } for _, pkeyCol := range normalizedTableSchema.PrimaryKeyColumns { @@ -101,14 +87,14 @@ func (m *mergeStmtGenerator) transformedPkeyStrings(normalizedTableSchema *proto continue } switch pkeyColType { - case qvalue.QValueKindJSON: + case types.QValueKindJSON: if forPartition { pkeys = append(pkeys, fmt.Sprintf("TO_JSON_STRING(%s)", m.shortColumn[pkeyCol])) } else { pkeys = append(pkeys, fmt.Sprintf("TO_JSON_STRING(_t.`%s`)=TO_JSON_STRING(_d.%s)", pkeyCol, m.shortColumn[pkeyCol])) } - case qvalue.QValueKindFloat32, qvalue.QValueKindFloat64: + case types.QValueKindFloat32, types.QValueKindFloat64: if forPartition { pkeys = append(pkeys, fmt.Sprintf("CAST(%s as STRING)", m.shortColumn[pkeyCol])) } else { diff --git a/flow/connectors/bigquery/qrep.go b/flow/connectors/bigquery/qrep.go index f1782c61b3..25721c110b 100644 --- a/flow/connectors/bigquery/qrep.go +++ b/flow/connectors/bigquery/qrep.go @@ -8,11 +8,11 @@ import ( "cloud.google.com/go/bigquery" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func (c *BigQueryConnector) SyncQRepRecords( @@ -20,17 +20,17 @@ func (c *BigQueryConnector) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { // Ensure the destination table is available. destTable := config.DestinationTableIdentifier srcSchema, err := stream.Schema() if err != nil { - return 0, err + return 0, nil, err } tblMetadata, err := c.replayTableSchemaDeltasQRep(ctx, config, partition, srcSchema) if err != nil { - return 0, err + return 0, nil, err } c.logger.Info(fmt.Sprintf("QRep sync function called and partition existence checked for"+ @@ -38,15 +38,19 @@ func (c *BigQueryConnector) SyncQRepRecords( partition.PartitionId, destTable)) avroSync := NewQRepAvroSyncMethod(c, config.StagingPath, config.FlowJobName) - return avroSync.SyncQRepRecords(ctx, config.Env, config.FlowJobName, destTable, partition, + result, err := avroSync.SyncQRepRecords(ctx, config.Env, config.FlowJobName, destTable, partition, tblMetadata, stream, config.SyncedAtColName, config.SoftDeleteColName) + if err != nil { + return result, nil, err + } + return result, nil, nil } func (c *BigQueryConnector) replayTableSchemaDeltasQRep( ctx context.Context, config *protos.QRepConfig, partition *protos.QRepPartition, - srcSchema qvalue.QRecordSchema, + srcSchema types.QRecordSchema, ) (*bigquery.TableMetadata, error) { destDatasetTable, _ := c.convertToDatasetTable(config.DestinationTableIdentifier) bqTable := c.client.DatasetInProject(c.projectID, destDatasetTable.dataset).Table(destDatasetTable.table) diff --git a/flow/connectors/bigquery/qrep_avro_sync.go b/flow/connectors/bigquery/qrep_avro_sync.go index 75485be7ae..73300f7daf 100644 --- a/flow/connectors/bigquery/qrep_avro_sync.go +++ b/flow/connectors/bigquery/qrep_avro_sync.go @@ -13,11 +13,12 @@ import ( "github.com/hamba/avro/v2" "github.com/hamba/avro/v2/ocf" - avroutils "github.com/PeerDB-io/peerdb/flow/connectors/utils/avro" + "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type QRepAvroSyncMethod struct { @@ -225,7 +226,7 @@ func DefineAvroSchema(dstTableName string, softDeleteCol string, ) (*model.QRecordAvroSchemaDefinition, error) { avroFields := make([]*avro.Field, 0, len(dstTableMetadata.Schema)) - qFields := make([]qvalue.QField, 0, len(avroFields)) + qFields := make([]types.QField, 0, len(avroFields)) for _, bqField := range dstTableMetadata.Schema { if bqField.Name == syncedAtCol || bqField.Name == softDeleteCol { continue @@ -303,7 +304,11 @@ func GetAvroType(bqField *bigquery.FieldSchema) (avro.Schema, error) { } return avro.NewRecordSchema("datetime", "", []*avro.Field{dateField, timeField}) case bigquery.BigNumericFieldType: - return avro.NewPrimitiveSchema(avro.Bytes, avro.NewDecimalLogicalSchema(int(avroNumericPrecision), int(avroNumericScale))), nil + bigNumericSchema := avro.NewPrimitiveSchema(avro.Bytes, avro.NewDecimalLogicalSchema(int(avroNumericPrecision), int(avroNumericScale))) + if bqField.Repeated { + return avro.NewArraySchema(bigNumericSchema), nil + } + return bigNumericSchema, nil case bigquery.RecordFieldType: avroFields := []*avro.Field{} for _, bqSubField := range bqField.Schema { @@ -353,8 +358,8 @@ func (s *QRepAvroSyncMethod) writeToStage( stream *model.QRecordStream, flowName string, ) (int64, error) { - var avroFile *avroutils.AvroFile - ocfWriter := avroutils.NewPeerDBOCFWriter(stream, avroSchema, ocf.Snappy, protos.DBType_BIGQUERY) + var avroFile utils.AvroFile + ocfWriter := utils.NewPeerDBOCFWriter(stream, avroSchema, ocf.Snappy, protos.DBType_BIGQUERY) idLog := slog.Group("write-metadata", slog.String(string(shared.FlowNameKey), flowName), slog.String("batchOrPartitionID", syncID), @@ -365,7 +370,7 @@ func (s *QRepAvroSyncMethod) writeToStage( obj := bucket.Object(avroFilePath) w := obj.NewWriter(ctx) - numRecords, err := ocfWriter.WriteOCF(ctx, env, w, nil) + numRecords, err := ocfWriter.WriteOCF(ctx, env, w, nil, nil) if err != nil { return 0, fmt.Errorf("failed to write records to Avro file on GCS: %w", err) } @@ -373,9 +378,9 @@ func (s *QRepAvroSyncMethod) writeToStage( return 0, fmt.Errorf("failed to close Avro file on GCS after writing: %w", err) } - avroFile = &avroutils.AvroFile{ + avroFile = utils.AvroFile{ NumRecords: numRecords, - StorageLocation: avroutils.AvroGCSStorage, + StorageLocation: utils.AvroGCSStorage, FilePath: avroFilePath, } } else { diff --git a/flow/connectors/bigquery/qvalue_convert.go b/flow/connectors/bigquery/qvalue_convert.go index 82256a7a6f..94570a5aa5 100644 --- a/flow/connectors/bigquery/qvalue_convert.go +++ b/flow/connectors/bigquery/qvalue_convert.go @@ -5,9 +5,9 @@ import ( "cloud.google.com/go/bigquery" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func qValueKindToBigQueryType(columnDescription *protos.FieldDescription, nullableEnabled bool) bigquery.FieldSchema { @@ -15,65 +15,68 @@ func qValueKindToBigQueryType(columnDescription *protos.FieldDescription, nullab Name: columnDescription.Name, Required: nullableEnabled && !columnDescription.Nullable, } - switch qvalue.QValueKind(columnDescription.Type) { + switch types.QValueKind(columnDescription.Type) { // boolean - case qvalue.QValueKindBoolean: + case types.QValueKindBoolean: bqField.Type = bigquery.BooleanFieldType // integer types - case qvalue.QValueKindInt8, qvalue.QValueKindInt16, qvalue.QValueKindInt32, qvalue.QValueKindInt64, - qvalue.QValueKindUInt8, qvalue.QValueKindUInt16, qvalue.QValueKindUInt32, qvalue.QValueKindUInt64: + case types.QValueKindInt8, types.QValueKindInt16, types.QValueKindInt32, types.QValueKindInt64, + types.QValueKindUInt8, types.QValueKindUInt16, types.QValueKindUInt32, types.QValueKindUInt64: bqField.Type = bigquery.IntegerFieldType // decimal types - case qvalue.QValueKindFloat32, qvalue.QValueKindFloat64: + case types.QValueKindFloat32, types.QValueKindFloat64: bqField.Type = bigquery.FloatFieldType - case qvalue.QValueKindNumeric: + case types.QValueKindNumeric: precision, scale := datatypes.GetNumericTypeForWarehouse(columnDescription.TypeModifier, datatypes.BigQueryNumericCompatibility{}) bqField.Type = bigquery.BigNumericFieldType bqField.Precision = int64(precision) bqField.Scale = int64(scale) + case types.QValueKindArrayNumeric: + precision, scale := datatypes.GetNumericTypeForWarehouse(columnDescription.TypeModifier, datatypes.BigQueryNumericCompatibility{}) + bqField.Type = bigquery.BigNumericFieldType + bqField.Precision = int64(precision) + bqField.Scale = int64(scale) + bqField.Repeated = true // string related - case qvalue.QValueKindString, qvalue.QValueKindEnum: + case types.QValueKindString, types.QValueKindEnum: bqField.Type = bigquery.StringFieldType // json related - case qvalue.QValueKindJSON, qvalue.QValueKindJSONB, qvalue.QValueKindHStore: + case types.QValueKindJSON, types.QValueKindJSONB, types.QValueKindHStore: bqField.Type = bigquery.JSONFieldType // time related - case qvalue.QValueKindTimestamp, qvalue.QValueKindTimestampTZ: + case types.QValueKindTimestamp, types.QValueKindTimestampTZ: bqField.Type = bigquery.TimestampFieldType - // TODO: https://github.com/PeerDB-io/peerdb/issues/189 - DATE support is incomplete - case qvalue.QValueKindDate: + case types.QValueKindDate: bqField.Type = bigquery.DateFieldType - // TODO: https://github.com/PeerDB-io/peerdb/issues/189 - TIME/TIMETZ support is incomplete - case qvalue.QValueKindTime, qvalue.QValueKindTimeTZ: + case types.QValueKindTime, types.QValueKindTimeTZ: bqField.Type = bigquery.TimeFieldType - // TODO: https://github.com/PeerDB-io/peerdb/issues/189 - handle INTERVAL types again, // bytes - case qvalue.QValueKindBytes: + case types.QValueKindBytes: bqField.Type = bigquery.BytesFieldType - case qvalue.QValueKindArrayInt16, qvalue.QValueKindArrayInt32, qvalue.QValueKindArrayInt64: + case types.QValueKindArrayInt16, types.QValueKindArrayInt32, types.QValueKindArrayInt64: bqField.Type = bigquery.IntegerFieldType bqField.Repeated = true - case qvalue.QValueKindArrayFloat32, qvalue.QValueKindArrayFloat64: + case types.QValueKindArrayFloat32, types.QValueKindArrayFloat64: bqField.Type = bigquery.FloatFieldType bqField.Repeated = true - case qvalue.QValueKindArrayBoolean: + case types.QValueKindArrayBoolean: bqField.Type = bigquery.BooleanFieldType bqField.Repeated = true - case qvalue.QValueKindArrayTimestamp, qvalue.QValueKindArrayTimestampTZ: + case types.QValueKindArrayTimestamp, types.QValueKindArrayTimestampTZ: bqField.Type = bigquery.TimestampFieldType bqField.Repeated = true - case qvalue.QValueKindArrayDate: + case types.QValueKindArrayDate: bqField.Type = bigquery.DateFieldType bqField.Repeated = true - case qvalue.QValueKindArrayString, qvalue.QValueKindArrayEnum: + case types.QValueKindArrayString, types.QValueKindArrayEnum, types.QValueKindArrayInterval: bqField.Type = bigquery.StringFieldType bqField.Repeated = true - case qvalue.QValueKindGeography, qvalue.QValueKindGeometry, qvalue.QValueKindPoint: + case types.QValueKindGeography, types.QValueKindGeometry, types.QValueKindPoint: bqField.Type = bigquery.GeographyFieldType - // UUID related - stored as strings for now - case qvalue.QValueKindUUID: + // UUID related - stored as strings + case types.QValueKindUUID: bqField.Type = bigquery.StringFieldType - case qvalue.QValueKindArrayUUID: + case types.QValueKindArrayUUID: bqField.Type = bigquery.StringFieldType bqField.Repeated = true // rest will be strings @@ -85,50 +88,53 @@ func qValueKindToBigQueryType(columnDescription *protos.FieldDescription, nullab } // BigQueryTypeToQValueKind converts a bigquery.FieldType to a QValueKind -func BigQueryTypeToQValueKind(fieldSchema *bigquery.FieldSchema) qvalue.QValueKind { +func BigQueryTypeToQValueKind(fieldSchema *bigquery.FieldSchema) types.QValueKind { switch fieldSchema.Type { case bigquery.StringFieldType: if fieldSchema.Repeated { - return qvalue.QValueKindArrayString + return types.QValueKindArrayString } - return qvalue.QValueKindString + return types.QValueKindString case bigquery.BytesFieldType: - return qvalue.QValueKindBytes + return types.QValueKindBytes case bigquery.IntegerFieldType: if fieldSchema.Repeated { - return qvalue.QValueKindArrayInt64 + return types.QValueKindArrayInt64 } - return qvalue.QValueKindInt64 + return types.QValueKindInt64 case bigquery.FloatFieldType: if fieldSchema.Repeated { - return qvalue.QValueKindArrayFloat64 + return types.QValueKindArrayFloat64 } - return qvalue.QValueKindFloat64 + return types.QValueKindFloat64 case bigquery.BooleanFieldType: if fieldSchema.Repeated { - return qvalue.QValueKindArrayBoolean + return types.QValueKindArrayBoolean } - return qvalue.QValueKindBoolean + return types.QValueKindBoolean case bigquery.TimestampFieldType: if fieldSchema.Repeated { - return qvalue.QValueKindArrayTimestamp + return types.QValueKindArrayTimestamp } - return qvalue.QValueKindTimestamp + return types.QValueKindTimestamp case bigquery.DateFieldType: if fieldSchema.Repeated { - return qvalue.QValueKindArrayDate + return types.QValueKindArrayDate } - return qvalue.QValueKindDate + return types.QValueKindDate case bigquery.TimeFieldType: - return qvalue.QValueKindTime + return types.QValueKindTime case bigquery.NumericFieldType, bigquery.BigNumericFieldType: - return qvalue.QValueKindNumeric + if fieldSchema.Repeated { + return types.QValueKindArrayNumeric + } + return types.QValueKindNumeric case bigquery.GeographyFieldType: - return qvalue.QValueKindGeography + return types.QValueKindGeography case bigquery.JSONFieldType: - return qvalue.QValueKindJSON + return types.QValueKindJSON default: - return qvalue.QValueKindInvalid + return types.QValueKindInvalid } } @@ -154,8 +160,8 @@ func qValueKindToBigQueryTypeString(columnDescription *protos.FieldDescription, return bqType } -func BigQueryFieldToQField(bqField *bigquery.FieldSchema) qvalue.QField { - return qvalue.QField{ +func BigQueryFieldToQField(bqField *bigquery.FieldSchema) types.QField { + return types.QField{ Name: bqField.Name, Type: BigQueryTypeToQValueKind(bqField), Precision: int16(bqField.Precision), diff --git a/flow/connectors/clickhouse/cdc.go b/flow/connectors/clickhouse/cdc.go index e6c8a6c601..b0b576ea3d 100644 --- a/flow/connectors/clickhouse/cdc.go +++ b/flow/connectors/clickhouse/cdc.go @@ -2,8 +2,6 @@ package connclickhouse import ( "context" - "database/sql" - "errors" "fmt" "log/slog" @@ -12,38 +10,37 @@ import ( "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" peerdb_clickhouse "github.com/PeerDB-io/peerdb/flow/shared/clickhouse" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) const ( - checkIfTableExistsSQL = `SELECT exists(SELECT 1 FROM system.tables WHERE database = ? AND name = ?) AS table_exists;` - dropTableIfExistsSQL = "DROP TABLE IF EXISTS `%s`;" + checkIfTableExistsSQL = `SELECT exists(SELECT 1 FROM system.tables WHERE database = %s AND name = %s) AS table_exists` + dropTableIfExistsSQL = "DROP TABLE IF EXISTS %s" ) -// getRawTableName returns the raw table name for the given table identifier. -func (c *ClickHouseConnector) getRawTableName(flowJobName string) string { +// GetRawTableName returns the raw table name for the given table identifier. +func (c *ClickHouseConnector) GetRawTableName(flowJobName string) string { return "_peerdb_raw_" + shared.ReplaceIllegalCharactersWithUnderscores(flowJobName) } func (c *ClickHouseConnector) checkIfTableExists(ctx context.Context, databaseName string, tableIdentifier string) (bool, error) { - var result sql.NullInt32 - err := c.queryRow(ctx, checkIfTableExistsSQL, databaseName, tableIdentifier).Scan(&result) - if err != nil { + var result uint8 + if err := c.queryRow(ctx, + fmt.Sprintf(checkIfTableExistsSQL, peerdb_clickhouse.QuoteLiteral(databaseName), peerdb_clickhouse.QuoteLiteral(tableIdentifier)), + ).Scan(&result); err != nil { return false, fmt.Errorf("error while reading result row: %w", err) } - if !result.Valid { - return false, errors.New("[clickhouse] checkIfTableExists: result is not valid") - } - - return result.Int32 == 1, nil + return result == 1, nil } func (c *ClickHouseConnector) CreateRawTable(ctx context.Context, req *protos.CreateRawTableInput) (*protos.CreateRawTableOutput, error) { - rawTableName := c.getRawTableName(req.FlowJobName) + rawTableName := c.GetRawTableName(req.FlowJobName) createRawTableSQL := `CREATE TABLE IF NOT EXISTS %s ( _peerdb_uid UUID, @@ -66,12 +63,13 @@ func (c *ClickHouseConnector) CreateRawTable(ctx context.Context, req *protos.Cr }, nil } -func (c *ClickHouseConnector) avroSyncMethod(flowJobName string, env map[string]string) *ClickHouseAvroSyncMethod { +func (c *ClickHouseConnector) avroSyncMethod(flowJobName string, env map[string]string, version uint32) *ClickHouseAvroSyncMethod { qrepConfig := &protos.QRepConfig{ StagingPath: c.credsProvider.BucketPath, FlowJobName: flowJobName, - DestinationTableIdentifier: c.getRawTableName(flowJobName), + DestinationTableIdentifier: c.GetRawTableName(flowJobName), Env: env, + Version: version, } return NewClickHouseAvroSyncMethod(qrepConfig, c) } @@ -82,17 +80,26 @@ func (c *ClickHouseConnector) syncRecordsViaAvro( syncBatchID int64, ) (*model.SyncResponse, error) { tableNameRowsMapping := utils.InitialiseTableRowsMap(req.TableMappings) - streamReq := model.NewRecordsToStreamRequest(req.Records.GetRecords(), tableNameRowsMapping, syncBatchID) - stream, err := utils.RecordsToRawTableStream(streamReq) + unboundedNumericAsString, err := internal.PeerDBEnableClickHouseNumericAsString(ctx, req.Env) + if err != nil { + return nil, err + } + streamReq := model.NewRecordsToStreamRequest( + req.Records.GetRecords(), tableNameRowsMapping, syncBatchID, unboundedNumericAsString, + protos.DBType_CLICKHOUSE, + ) + numericTruncator := model.NewStreamNumericTruncator(req.TableMappings, peerdb_clickhouse.NumericDestinationTypes) + stream, err := utils.RecordsToRawTableStream(streamReq, numericTruncator) if err != nil { return nil, fmt.Errorf("failed to convert records to raw table stream: %w", err) } - avroSyncer := c.avroSyncMethod(req.FlowJobName, req.Env) + avroSyncer := c.avroSyncMethod(req.FlowJobName, req.Env, req.Version) numRecords, err := avroSyncer.SyncRecords(ctx, req.Env, stream, req.FlowJobName, syncBatchID) if err != nil { return nil, err } + warnings := numericTruncator.Warnings() if err := c.ReplayTableSchemaDeltas(ctx, req.Env, req.FlowJobName, req.Records.SchemaDeltas); err != nil { return nil, fmt.Errorf("failed to sync schema changes: %w", err) @@ -104,6 +111,7 @@ func (c *ClickHouseConnector) syncRecordsViaAvro( CurrentSyncBatchID: syncBatchID, TableNameRowsMapping: tableNameRowsMapping, TableSchemaDeltas: req.Records.SchemaDeltas, + Warnings: warnings, }, nil } @@ -137,15 +145,17 @@ func (c *ClickHouseConnector) ReplayTableSchemaDeltas( } for _, addedColumn := range schemaDelta.AddedColumns { - clickHouseColType, err := qvalue.QValueKind(addedColumn.Type).ToDWHColumnType( - ctx, env, protos.DBType_CLICKHOUSE, addedColumn, schemaDelta.NullableEnabled, + qvKind := types.QValueKind(addedColumn.Type) + clickHouseColType, err := qvalue.ToDWHColumnType( + ctx, qvKind, env, protos.DBType_CLICKHOUSE, addedColumn, schemaDelta.NullableEnabled, ) if err != nil { return fmt.Errorf("failed to convert column type %s to ClickHouse type: %w", addedColumn.Type, err) } if err := c.execWithLogging(ctx, - fmt.Sprintf("ALTER TABLE `%s` ADD COLUMN IF NOT EXISTS `%s` %s", - schemaDelta.DstTableName, addedColumn.Name, clickHouseColType), + fmt.Sprintf("ALTER TABLE %s ADD COLUMN IF NOT EXISTS %s %s", + peerdb_clickhouse.QuoteIdentifier(schemaDelta.DstTableName), + peerdb_clickhouse.QuoteIdentifier(addedColumn.Name), clickHouseColType), ); err != nil { return fmt.Errorf("failed to add column %s for table %s: %w", addedColumn.Name, schemaDelta.DstTableName, err) } @@ -165,13 +175,19 @@ func (c *ClickHouseConnector) RenameTables( tableNameSchemaMapping map[string]*protos.TableSchema, ) (*protos.RenameTablesOutput, error) { for _, renameRequest := range req.RenameTableOptions { + if renameRequest.CurrentName == renameRequest.NewName { + c.logger.Info("table rename is nop, probably Null table engine, skipping rename for it", + slog.String("table", renameRequest.CurrentName)) + continue + } + resyncTableExists, err := c.checkIfTableExists(ctx, c.config.Database, renameRequest.CurrentName) if err != nil { return nil, fmt.Errorf("unable to check if resync table %s exists: %w", renameRequest.CurrentName, err) } if !resyncTableExists { - c.logger.Info(fmt.Sprintf("table '%s' does not exist, skipping rename for it", renameRequest.CurrentName)) + c.logger.Info("table does not exist, skipping rename for it", slog.String("table", renameRequest.CurrentName)) continue } @@ -186,9 +202,12 @@ func (c *ClickHouseConnector) RenameTables( c.logger.Info("attempting atomic exchange", slog.String("OldName", renameRequest.CurrentName), slog.String("NewName", renameRequest.NewName)) if err = c.execWithLogging(ctx, - fmt.Sprintf("EXCHANGE TABLES `%s` and `%s`", renameRequest.NewName, renameRequest.CurrentName), + fmt.Sprintf("EXCHANGE TABLES %s and %s", + peerdb_clickhouse.QuoteIdentifier(renameRequest.NewName), peerdb_clickhouse.QuoteIdentifier(renameRequest.CurrentName)), ); err == nil { - if err := c.execWithLogging(ctx, fmt.Sprintf(dropTableIfExistsSQL, renameRequest.CurrentName)); err != nil { + if err := c.execWithLogging(ctx, + fmt.Sprintf(dropTableIfExistsSQL, peerdb_clickhouse.QuoteIdentifier(renameRequest.CurrentName)), + ); err != nil { return nil, fmt.Errorf("unable to drop exchanged table %s: %w", renameRequest.CurrentName, err) } } else if ex, ok := err.(*clickhouse.Exception); !ok || ex.Code != 48 { @@ -201,13 +220,16 @@ func (c *ClickHouseConnector) RenameTables( // either original table doesn't exist, in which case it is safe to just run rename, // or err is set (in which case err comes from EXCHANGE TABLES) if !originalTableExists || err != nil { - if err := c.execWithLogging(ctx, fmt.Sprintf(dropTableIfExistsSQL, renameRequest.NewName)); err != nil { + if err := c.execWithLogging(ctx, + fmt.Sprintf(dropTableIfExistsSQL, peerdb_clickhouse.QuoteIdentifier(renameRequest.NewName)), + ); err != nil { return nil, fmt.Errorf("unable to drop table %s: %w", renameRequest.NewName, err) } - if err := c.execWithLogging(ctx, - fmt.Sprintf("RENAME TABLE `%s` TO `%s`", renameRequest.CurrentName, renameRequest.NewName), - ); err != nil { + if err := c.execWithLogging(ctx, fmt.Sprintf("RENAME TABLE %s TO %s", + peerdb_clickhouse.QuoteIdentifier(renameRequest.CurrentName), + peerdb_clickhouse.QuoteIdentifier(renameRequest.NewName), + )); err != nil { return nil, fmt.Errorf("unable to rename table %s to %s: %w", renameRequest.CurrentName, renameRequest.NewName, err) } } @@ -223,8 +245,8 @@ func (c *ClickHouseConnector) RenameTables( func (c *ClickHouseConnector) SyncFlowCleanup(ctx context.Context, jobName string) error { // delete raw table if exists - rawTableIdentifier := c.getRawTableName(jobName) - if err := c.execWithLogging(ctx, fmt.Sprintf(dropTableIfExistsSQL, rawTableIdentifier)); err != nil { + rawTableIdentifier := c.GetRawTableName(jobName) + if err := c.execWithLogging(ctx, fmt.Sprintf(dropTableIfExistsSQL, peerdb_clickhouse.QuoteIdentifier(rawTableIdentifier))); err != nil { return fmt.Errorf("[clickhouse] unable to drop raw table: %w", err) } c.logger.Info("successfully dropped raw table " + rawTableIdentifier) @@ -240,14 +262,14 @@ func (c *ClickHouseConnector) RemoveTableEntriesFromRawTable( // Better to use lightweight deletes here as the main goal is to // not have the rows in the table be visible by the NormalizeRecords' // INSERT INTO SELECT queries - err := c.execWithLogging(ctx, fmt.Sprintf("DELETE FROM `%s` WHERE _peerdb_destination_table_name = %s"+ + if err := c.execWithLogging(ctx, fmt.Sprintf("DELETE FROM `%s` WHERE _peerdb_destination_table_name = %s"+ " AND _peerdb_batch_id > %d AND _peerdb_batch_id <= %d", - c.getRawTableName(req.FlowJobName), peerdb_clickhouse.QuoteLiteral(tableName), req.NormalizeBatchId, req.SyncBatchId)) - if err != nil { + c.GetRawTableName(req.FlowJobName), peerdb_clickhouse.QuoteLiteral(tableName), req.NormalizeBatchId, req.SyncBatchId), + ); err != nil { return fmt.Errorf("unable to remove table %s from raw table: %w", tableName, err) } - c.logger.Info(fmt.Sprintf("successfully removed entries for table '%s' from raw table", tableName)) + c.logger.Info("successfully removed entries for table from raw table", slog.String("table", tableName)) } return nil diff --git a/flow/connectors/clickhouse/clickhouse.go b/flow/connectors/clickhouse/clickhouse.go index 7ab49fef0a..0701724361 100644 --- a/flow/connectors/clickhouse/clickhouse.go +++ b/flow/connectors/clickhouse/clickhouse.go @@ -22,9 +22,9 @@ import ( "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" chvalidate "github.com/PeerDB-io/peerdb/flow/shared/clickhouse" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type ClickHouseConnector struct { @@ -52,19 +52,28 @@ func NewClickHouseConnector( return nil, err } - credentialsProvider, err := utils.GetAWSCredentialsProvider(ctx, "clickhouse", utils.PeerAWSCredentials{ - Credentials: aws.Credentials{ - AccessKeyID: config.AccessKeyId, - SecretAccessKey: config.SecretAccessKey, - }, - EndpointUrl: config.Endpoint, - Region: config.Region, - }) + var awsConfig utils.PeerAWSCredentials + var awsBucketPath string + if config.S3 != nil { + awsConfig = utils.NewPeerAWSCredentials(config.S3) + awsBucketPath = config.S3.Url + } else { + awsConfig = utils.PeerAWSCredentials{ + Credentials: aws.Credentials{ + AccessKeyID: config.AccessKeyId, + SecretAccessKey: config.SecretAccessKey, + }, + EndpointUrl: config.Endpoint, + Region: config.Region, + } + awsBucketPath = config.S3Path + } + + credentialsProvider, err := utils.GetAWSCredentialsProvider(ctx, "clickhouse", awsConfig) if err != nil { return nil, err } - awsBucketPath := config.S3Path if awsBucketPath == "" { deploymentUID := internal.PeerDBDeploymentUID() flowName, _ := ctx.Value(shared.FlowNameKey).(string) @@ -157,11 +166,9 @@ func (c *ClickHouseConnector) ValidateCheck(ctx context.Context) error { } validateDummyTableName := "peerdb_validation_" + shared.RandomString(4) // create a table - err := c.exec(ctx, fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s ( - id UInt64 - ) ENGINE = ReplacingMergeTree ORDER BY id;`, - validateDummyTableName)) - if err != nil { + if err := c.exec(ctx, + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (id UInt64) ENGINE = ReplacingMergeTree ORDER BY id;`, validateDummyTableName), + ); err != nil { return fmt.Errorf("failed to create validation table %s: %w", validateDummyTableName, err) } defer func() { @@ -174,21 +181,21 @@ func (c *ClickHouseConnector) ValidateCheck(ctx context.Context) error { // add a column if err := c.exec(ctx, - fmt.Sprintf("ALTER TABLE `%s` ADD COLUMN updated_at DateTime64(9) DEFAULT now64()", validateDummyTableName), + fmt.Sprintf("ALTER TABLE %s ADD COLUMN updated_at DateTime64(9) DEFAULT now64()", validateDummyTableName), ); err != nil { return fmt.Errorf("failed to add column to validation table %s: %w", validateDummyTableName, err) } // rename the table if err := c.exec(ctx, - fmt.Sprintf("RENAME TABLE `%s` TO `%s`", validateDummyTableName, validateDummyTableName+"_renamed"), + fmt.Sprintf("RENAME TABLE %s TO %s", validateDummyTableName, validateDummyTableName+"_renamed"), ); err != nil { return fmt.Errorf("failed to rename validation table %s: %w", validateDummyTableName, err) } validateDummyTableName += "_renamed" // insert a row - if err := c.exec(ctx, fmt.Sprintf("INSERT INTO `%s` VALUES (1, now64())", validateDummyTableName)); err != nil { + if err := c.exec(ctx, fmt.Sprintf("INSERT INTO %s VALUES (1, now64())", validateDummyTableName)); err != nil { return fmt.Errorf("failed to insert into validation table %s: %w", validateDummyTableName, err) } @@ -246,7 +253,7 @@ func Connect(ctx context.Context, env map[string]string, config *protos.Clickhou } conn, err := clickhouse.Open(&clickhouse.Options{ - Addr: []string{fmt.Sprintf("%s:%d", config.Host, config.Port)}, + Addr: []string{shared.JoinHostPort(config.Host, config.Port)}, Auth: clickhouse.Auth{ Database: config.Database, Username: config.User, @@ -278,21 +285,20 @@ func Connect(ctx context.Context, env map[string]string, config *protos.Clickhou return conn, nil } -//nolint:unparam -func (c *ClickHouseConnector) exec(ctx context.Context, query string, args ...any) error { - return chvalidate.Exec(ctx, c.logger, c.database, query, args...) +func (c *ClickHouseConnector) exec(ctx context.Context, query string) error { + return chvalidate.Exec(ctx, c.logger, c.database, query) } -func (c *ClickHouseConnector) execWithConnection(ctx context.Context, conn clickhouse.Conn, query string, args ...any) error { - return chvalidate.Exec(ctx, c.logger, conn, query, args...) +func (c *ClickHouseConnector) execWithConnection(ctx context.Context, conn clickhouse.Conn, query string) error { + return chvalidate.Exec(ctx, c.logger, conn, query) } -func (c *ClickHouseConnector) query(ctx context.Context, query string, args ...any) (driver.Rows, error) { - return chvalidate.Query(ctx, c.logger, c.database, query, args...) +func (c *ClickHouseConnector) query(ctx context.Context, query string) (driver.Rows, error) { + return chvalidate.Query(ctx, c.logger, c.database, query) } -func (c *ClickHouseConnector) queryRow(ctx context.Context, query string, args ...any) driver.Row { - return chvalidate.QueryRow(ctx, c.logger, c.database, query, args...) +func (c *ClickHouseConnector) queryRow(ctx context.Context, query string) driver.Row { + return chvalidate.QueryRow(ctx, c.logger, c.database, query) } func (c *ClickHouseConnector) Close() error { @@ -370,51 +376,57 @@ func GetTableSchemaForTable(tm *protos.TableMapping, columns []driver.ColumnType continue } - var qkind qvalue.QValueKind + var qkind types.QValueKind switch column.DatabaseTypeName() { case "String", "Nullable(String)", "LowCardinality(String)", "LowCardinality(Nullable(String))": - qkind = qvalue.QValueKindString + qkind = types.QValueKindString case "Bool", "Nullable(Bool)": - qkind = qvalue.QValueKindBoolean + qkind = types.QValueKindBoolean case "Int8", "Nullable(Int8)": - qkind = qvalue.QValueKindInt8 + qkind = types.QValueKindInt8 case "Int16", "Nullable(Int16)": - qkind = qvalue.QValueKindInt16 + qkind = types.QValueKindInt16 case "Int32", "Nullable(Int32)": - qkind = qvalue.QValueKindInt32 + qkind = types.QValueKindInt32 case "Int64", "Nullable(Int64)": - qkind = qvalue.QValueKindInt64 + qkind = types.QValueKindInt64 case "UInt8", "Nullable(UInt8)": - qkind = qvalue.QValueKindUInt8 + qkind = types.QValueKindUInt8 case "UInt16", "Nullable(UInt16)": - qkind = qvalue.QValueKindUInt16 + qkind = types.QValueKindUInt16 case "UInt32", "Nullable(UInt32)": - qkind = qvalue.QValueKindUInt32 + qkind = types.QValueKindUInt32 case "UInt64", "Nullable(UInt64)": - qkind = qvalue.QValueKindUInt64 + qkind = types.QValueKindUInt64 case "UUID", "Nullable(UUID)": - qkind = qvalue.QValueKindUUID + qkind = types.QValueKindUUID case "DateTime64(6)", "Nullable(DateTime64(6))", "DateTime64(9)", "Nullable(DateTime64(9))": - qkind = qvalue.QValueKindTimestamp + qkind = types.QValueKindTimestamp case "Date32", "Nullable(Date32)": - qkind = qvalue.QValueKindDate + qkind = types.QValueKindDate case "Float32", "Nullable(Float32)": - qkind = qvalue.QValueKindFloat32 + qkind = types.QValueKindFloat32 case "Float64", "Nullable(Float64)": - qkind = qvalue.QValueKindFloat64 + qkind = types.QValueKindFloat64 case "Array(Int32)": - qkind = qvalue.QValueKindArrayInt32 + qkind = types.QValueKindArrayInt32 case "Array(Float32)": - qkind = qvalue.QValueKindArrayFloat32 + qkind = types.QValueKindArrayFloat32 case "Array(Float64)": - qkind = qvalue.QValueKindArrayFloat64 + qkind = types.QValueKindArrayFloat64 case "Array(String)", "Array(LowCardinality(String))": - qkind = qvalue.QValueKindArrayString + qkind = types.QValueKindArrayString case "Array(UUID)": - qkind = qvalue.QValueKindArrayUUID + qkind = types.QValueKindArrayUUID + case "Array(DateTime64(6))": + qkind = types.QValueKindArrayTimestamp default: if strings.Contains(column.DatabaseTypeName(), "Decimal") { - qkind = qvalue.QValueKindNumeric + if strings.HasPrefix(column.DatabaseTypeName(), "Array(") { + qkind = types.QValueKindArrayNumeric + } else { + qkind = types.QValueKindNumeric + } } else { return nil, fmt.Errorf("failed to resolve QValueKind for %s", column.DatabaseTypeName()) } @@ -438,6 +450,7 @@ func GetTableSchemaForTable(tm *protos.TableMapping, columns []driver.ColumnType func (c *ClickHouseConnector) GetTableSchema( ctx context.Context, _env map[string]string, + _version uint32, _system protos.TypeSystem, tableMappings []*protos.TableMapping, ) (map[string]*protos.TableSchema, error) { diff --git a/flow/connectors/clickhouse/normalize.go b/flow/connectors/clickhouse/normalize.go index e30a9f3088..6625c235e1 100644 --- a/flow/connectors/clickhouse/normalize.go +++ b/flow/connectors/clickhouse/normalize.go @@ -3,8 +3,6 @@ package connclickhouse import ( "cmp" "context" - "database/sql" - "errors" "fmt" "log/slog" "slices" @@ -14,13 +12,13 @@ import ( "github.com/ClickHouse/clickhouse-go/v2" "golang.org/x/sync/errgroup" - "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" peerdb_clickhouse "github.com/PeerDB-io/peerdb/flow/shared/clickhouse" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) const ( @@ -47,23 +45,23 @@ func (c *ClickHouseConnector) SetupNormalizedTable( ctx context.Context, tx any, config *protos.SetupNormalizedTableBatchInput, - tableIdentifier string, - tableSchema *protos.TableSchema, + destinationTableIdentifier string, + sourceTableSchema *protos.TableSchema, ) (bool, error) { - tableAlreadyExists, err := c.checkIfTableExists(ctx, c.config.Database, tableIdentifier) + tableAlreadyExists, err := c.checkIfTableExists(ctx, c.config.Database, destinationTableIdentifier) if err != nil { return false, fmt.Errorf("error occurred while checking if destination ClickHouse table exists: %w", err) } if tableAlreadyExists && !config.IsResync { - c.logger.Info("[ch] destination ClickHouse table already exists, skipping", "table", tableIdentifier) + c.logger.Info("[ch] destination ClickHouse table already exists, skipping", "table", destinationTableIdentifier) return true, nil } normalizedTableCreateSQL, err := generateCreateTableSQLForNormalizedTable( ctx, config, - tableIdentifier, - tableSchema, + destinationTableIdentifier, + sourceTableSchema, ) if err != nil { return false, fmt.Errorf("error while generating create table sql for destination ClickHouse table: %w", err) @@ -111,7 +109,7 @@ func generateCreateTableSQLForNormalizedTable( for _, column := range tableSchema.Columns { colName := column.Name dstColName := colName - colType := qvalue.QValueKind(column.Type) + colType := types.QValueKind(column.Type) var columnNullableEnabled bool var clickHouseType string if tableMapping != nil { @@ -132,8 +130,8 @@ func generateCreateTableSQLForNormalizedTable( if clickHouseType == "" { var err error - clickHouseType, err = colType.ToDWHColumnType( - ctx, config.Env, protos.DBType_CLICKHOUSE, column, tableSchema.NullableEnabled || columnNullableEnabled, + clickHouseType, err = qvalue.ToDWHColumnType( + ctx, colType, config.Env, protos.DBType_CLICKHOUSE, column, tableSchema.NullableEnabled || columnNullableEnabled, ) if err != nil { return "", fmt.Errorf("error while converting column type to ClickHouse type: %w", err) @@ -256,11 +254,6 @@ func getOrderedOrderByColumns( return orderbyColumns } -type NormalizeInsertQuery struct { - query string - tableName string -} - func (c *ClickHouseConnector) NormalizeRecords( ctx context.Context, req *model.NormalizeRecordsRequest, @@ -279,7 +272,7 @@ func (c *ClickHouseConnector) NormalizeRecords( }, nil } - if err := c.copyAvroStagesToDestination(ctx, req.FlowJobName, normBatchID, req.SyncBatchID, req.Env); err != nil { + if err := c.copyAvroStagesToDestination(ctx, req.FlowJobName, normBatchID, req.SyncBatchID, req.Env, req.Version); err != nil { return model.NormalizeResponse{}, fmt.Errorf("failed to copy avro stages to destination: %w", err) } @@ -333,8 +326,8 @@ func (c *ClickHouseConnector) NormalizeRecords( numParts = max(numParts, 1) - queries := make(chan NormalizeInsertQuery) - rawTbl := c.getRawTableName(req.FlowJobName) + queries := make(chan NormalizeQueryGenerator) + rawTbl := c.GetRawTableName(req.FlowJobName) group, errCtx := errgroup.WithContext(ctx) for i := range parallelNormalize { @@ -355,11 +348,27 @@ func (c *ClickHouseConnector) NormalizeRecords( c.logger.Info("executing INSERT command to ClickHouse table", slog.Int64("syncBatchId", req.SyncBatchID), slog.Int64("normalizeBatchId", normBatchID), - slog.String("destinationTable", insertIntoSelectQuery.tableName), - slog.String("query", insertIntoSelectQuery.query)) + slog.String("destinationTable", insertIntoSelectQuery.TableName), + slog.String("query", insertIntoSelectQuery.Query)) + + if err := c.execWithConnection(errCtx, chConn, insertIntoSelectQuery.Query); err != nil { + c.logger.Error("[clickhouse] error while inserting into target clickhouse table", + slog.String("table", insertIntoSelectQuery.TableName), + slog.Int64("syncBatchID", req.SyncBatchID), + slog.Int64("normalizeBatchID", normBatchID), + slog.Any("error", err)) + return fmt.Errorf("error while inserting into target clickhouse table %s: %w", insertIntoSelectQuery.TableName, err) + } - if err := c.execWithConnection(ctx, chConn, insertIntoSelectQuery.query); err != nil { - return fmt.Errorf("error while inserting into destination ClickHouse table %s: %w", insertIntoSelectQuery.tableName, err) + if insertIntoSelectQuery.Part == numParts-1 { + c.logger.Info("[clickhouse] set last normalized batch id for table", + slog.String("table", insertIntoSelectQuery.TableName), + slog.Int64("syncBatchID", req.SyncBatchID), + slog.Int64("lastNormalizedBatchID", normBatchID)) + err := c.SetLastNormalizedBatchIDForTable(ctx, req.FlowJobName, insertIntoSelectQuery.TableName, req.SyncBatchID) + if err != nil { + return fmt.Errorf("error while setting last synced batch id for table %s: %w", insertIntoSelectQuery.TableName, err) + } } } return nil @@ -367,208 +376,52 @@ func (c *ClickHouseConnector) NormalizeRecords( } for _, tbl := range destinationTableNames { - for numPart := range numParts { - selectQuery := strings.Builder{} - selectQuery.WriteString("SELECT ") - - colSelector := strings.Builder{} - colSelector.WriteByte('(') - - schema := req.TableNameSchemaMapping[tbl] - - var tableMapping *protos.TableMapping - for _, tm := range req.TableMappings { - if tm.DestinationTableIdentifier == tbl { - tableMapping = tm - break - } - } - - var escapedSourceSchemaSelectorFragment string - if sourceSchemaAsDestinationColumn { - if tableMapping == nil { - return model.NormalizeResponse{}, errors.New("could not look up source schema info") - } - schemaTable, err := utils.ParseSchemaTable(tableMapping.SourceTableIdentifier) - if err != nil { - return model.NormalizeResponse{}, err - } - escapedSourceSchemaSelectorFragment = fmt.Sprintf("%s AS %s,", - peerdb_clickhouse.QuoteLiteral(schemaTable.Schema), peerdb_clickhouse.QuoteIdentifier(sourceSchemaColName)) - } - - projection := strings.Builder{} - projectionUpdate := strings.Builder{} - - for _, column := range schema.Columns { - colName := column.Name - dstColName := colName - colType := qvalue.QValueKind(column.Type) - - var clickHouseType string - var columnNullableEnabled bool - if tableMapping != nil { - for _, col := range tableMapping.Columns { - if col.SourceName == colName { - if col.DestinationName != "" { - dstColName = col.DestinationName - } - if col.DestinationType != "" { - // TODO can we restrict this to avoid injection? - clickHouseType = col.DestinationType - } - columnNullableEnabled = col.NullableEnabled - break - } - } - } - - fmt.Fprintf(&colSelector, "%s,", peerdb_clickhouse.QuoteIdentifier(dstColName)) - if clickHouseType == "" { - var err error - clickHouseType, err = colType.ToDWHColumnType( - ctx, req.Env, protos.DBType_CLICKHOUSE, column, schema.NullableEnabled || columnNullableEnabled, - ) - if err != nil { - close(queries) - return model.NormalizeResponse{}, fmt.Errorf("error while converting column type to clickhouse type: %w", err) - } - } - - switch clickHouseType { - case "Date32", "Nullable(Date32)": - fmt.Fprintf(&projection, - "toDate32(parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_data, %s),6)) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - if enablePrimaryUpdate { - fmt.Fprintf(&projectionUpdate, - "toDate32(parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_match_data, %s),6)) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - } - case "DateTime64(6)", "Nullable(DateTime64(6))": - fmt.Fprintf(&projection, - "parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_data, %s),6) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - if enablePrimaryUpdate { - fmt.Fprintf(&projectionUpdate, - "parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_match_data, %s),6) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - } - default: - projLen := projection.Len() - if colType == qvalue.QValueKindBytes { - format, err := internal.PeerDBBinaryFormat(ctx, req.Env) - if err != nil { - return model.NormalizeResponse{}, err - } - switch format { - case internal.BinaryFormatRaw: - fmt.Fprintf(&projection, - "base64Decode(JSONExtractString(_peerdb_data, %s)) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - if enablePrimaryUpdate { - fmt.Fprintf(&projectionUpdate, - "base64Decode(JSONExtractString(_peerdb_match_data, %s)) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - } - case internal.BinaryFormatHex: - fmt.Fprintf(&projection, "hex(base64Decode(JSONExtractString(_peerdb_data, %s))) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - if enablePrimaryUpdate { - fmt.Fprintf(&projectionUpdate, - "hex(base64Decode(JSONExtractString(_peerdb_match_data, %s))) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - } - } - } - - // proceed with default logic if logic above didn't add any sql - if projection.Len() == projLen { - fmt.Fprintf( - &projection, - "JSONExtract(_peerdb_data, %s, %s) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteLiteral(clickHouseType), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - if enablePrimaryUpdate { - fmt.Fprintf( - &projectionUpdate, - "JSONExtract(_peerdb_match_data, %s, %s) AS %s,", - peerdb_clickhouse.QuoteLiteral(colName), - peerdb_clickhouse.QuoteLiteral(clickHouseType), - peerdb_clickhouse.QuoteIdentifier(dstColName), - ) - } - } - } - } - - if sourceSchemaAsDestinationColumn { - projection.WriteString(escapedSourceSchemaSelectorFragment) - fmt.Fprintf(&colSelector, "%s,", peerdb_clickhouse.QuoteIdentifier(sourceSchemaColName)) - } - - // add _peerdb_sign as _peerdb_record_type / 2 - fmt.Fprintf(&projection, "intDiv(_peerdb_record_type, 2) AS %s,", peerdb_clickhouse.QuoteIdentifier(signColName)) - fmt.Fprintf(&colSelector, "%s,", peerdb_clickhouse.QuoteIdentifier(signColName)) - - // add _peerdb_timestamp as _peerdb_version - fmt.Fprintf(&projection, "_peerdb_timestamp AS %s", peerdb_clickhouse.QuoteIdentifier(versionColName)) - fmt.Fprintf(&colSelector, "%s) ", peerdb_clickhouse.QuoteIdentifier(versionColName)) - - selectQuery.WriteString(projection.String()) - fmt.Fprintf(&selectQuery, - " FROM %s WHERE _peerdb_batch_id > %d AND _peerdb_batch_id <= %d AND _peerdb_destination_table_name = %s", - peerdb_clickhouse.QuoteIdentifier(rawTbl), normBatchID, req.SyncBatchID, peerdb_clickhouse.QuoteLiteral(tbl)) - if numParts > 1 { - fmt.Fprintf(&selectQuery, " AND cityHash64(_peerdb_uid) %% %d = %d", numParts, numPart) - } + normalizeBatchIDForTable, err := c.GetLastNormalizedBatchIDForTable(ctx, req.FlowJobName, tbl) + if err != nil { + c.logger.Error("[clickhouse] error while getting last synced batch id for table", "table", tbl, "error", err) + return model.NormalizeResponse{}, err + } - if enablePrimaryUpdate { - if sourceSchemaAsDestinationColumn { - projectionUpdate.WriteString(escapedSourceSchemaSelectorFragment) - } + c.logger.Info("[clickhouse] last normalized batch id for table", + "table", tbl, "lastNormalizedBatchID", normalizeBatchIDForTable, + "syncBatchID", req.SyncBatchID) + batchIdToLoadForTable := max(normBatchID, normalizeBatchIDForTable) + if batchIdToLoadForTable >= req.SyncBatchID { + c.logger.Info("[clickhouse] table already synced to destination for this batch, skipping", + "table", tbl, "batchIdToLoadForTable", batchIdToLoadForTable, "syncBatchID", req.SyncBatchID) + continue + } - // projectionUpdate generates delete on previous record, so _peerdb_record_type is filled in as 2 - fmt.Fprintf(&projectionUpdate, "1 AS %s,", peerdb_clickhouse.QuoteIdentifier(signColName)) - // decrement timestamp by 1 so delete is ordered before latest data, - // could be same if deletion records were only generated when ordering updated - fmt.Fprintf(&projectionUpdate, "_peerdb_timestamp - 1 AS %s", peerdb_clickhouse.QuoteIdentifier(versionColName)) - - selectQuery.WriteString(" UNION ALL SELECT ") - selectQuery.WriteString(projectionUpdate.String()) - fmt.Fprintf(&selectQuery, - " FROM %s WHERE _peerdb_match_data != '' AND _peerdb_batch_id > %d AND _peerdb_batch_id <= %d"+ - " AND _peerdb_destination_table_name = %s AND _peerdb_record_type = 1", - peerdb_clickhouse.QuoteIdentifier(rawTbl), normBatchID, req.SyncBatchID, peerdb_clickhouse.QuoteLiteral(tbl)) - if numParts > 1 { - fmt.Fprintf(&selectQuery, " AND cityHash64(_peerdb_uid) %% %d = %d", numParts, numPart) - } + for numPart := range numParts { + queryGenerator := NewNormalizeQueryGenerator( + tbl, + numPart, + req.TableNameSchemaMapping, + req.TableMappings, + req.SyncBatchID, + batchIdToLoadForTable, + numParts, + enablePrimaryUpdate, + sourceSchemaAsDestinationColumn, + req.Env, + rawTbl, + ) + insertIntoSelectQuery, err := queryGenerator.BuildQuery(ctx) + if err != nil { + close(queries) + c.logger.Error("[clickhouse] error while building insert into select query", + slog.String("table", tbl), + slog.Int64("syncBatchID", req.SyncBatchID), + slog.Int64("normalizeBatchID", normBatchID), + slog.Any("error", err)) + return model.NormalizeResponse{}, fmt.Errorf("error while building insert into select query for table %s: %w", tbl, err) } - insertIntoSelectQuery := fmt.Sprintf("INSERT INTO %s %s %s", - peerdb_clickhouse.QuoteIdentifier(tbl), colSelector.String(), selectQuery.String()) select { - case queries <- NormalizeInsertQuery{ - query: insertIntoSelectQuery, - tableName: tbl, + case queries <- NormalizeQueryGenerator{ + TableName: tbl, + Query: insertIntoSelectQuery, + Part: numPart, }: case <-errCtx.Done(): close(queries) @@ -602,11 +455,11 @@ func (c *ClickHouseConnector) getDistinctTableNamesInBatch( normalizeBatchID int64, tableToSchema map[string]*protos.TableSchema, ) ([]string, error) { - rawTbl := c.getRawTableName(flowJobName) + rawTbl := c.GetRawTableName(flowJobName) q := fmt.Sprintf( "SELECT DISTINCT _peerdb_destination_table_name FROM %s WHERE _peerdb_batch_id>%d AND _peerdb_batch_id<=%d", - rawTbl, normalizeBatchID, syncBatchID) + peerdb_clickhouse.QuoteIdentifier(rawTbl), normalizeBatchID, syncBatchID) rows, err := c.query(ctx, q) if err != nil { @@ -615,19 +468,15 @@ func (c *ClickHouseConnector) getDistinctTableNamesInBatch( defer rows.Close() var tableNames []string for rows.Next() { - var tableName sql.NullString + var tableName string if err := rows.Scan(&tableName); err != nil { return nil, fmt.Errorf("error while scanning table name: %w", err) } - if !tableName.Valid { - return nil, errors.New("table name is not valid") - } - - if _, ok := tableToSchema[tableName.String]; ok { - tableNames = append(tableNames, tableName.String) + if _, ok := tableToSchema[tableName]; ok { + tableNames = append(tableNames, tableName) } else { - c.logger.Warn("table not found in table to schema mapping", "table", tableName.String) + c.logger.Warn("table not found in table to schema mapping", "table", tableName) } } @@ -643,8 +492,9 @@ func (c *ClickHouseConnector) copyAvroStageToDestination( flowJobName string, syncBatchID int64, env map[string]string, + version uint32, ) error { - avroSyncMethod := c.avroSyncMethod(flowJobName, env) + avroSyncMethod := c.avroSyncMethod(flowJobName, env, version) avroFile, err := GetAvroStage(ctx, flowJobName, syncBatchID) if err != nil { return fmt.Errorf("failed to get avro stage: %w", err) @@ -658,12 +508,31 @@ func (c *ClickHouseConnector) copyAvroStageToDestination( } func (c *ClickHouseConnector) copyAvroStagesToDestination( - ctx context.Context, flowJobName string, normBatchID, syncBatchID int64, env map[string]string, + ctx context.Context, flowJobName string, normBatchID int64, syncBatchID int64, env map[string]string, version uint32, ) error { - for s := normBatchID + 1; s <= syncBatchID; s++ { - if err := c.copyAvroStageToDestination(ctx, flowJobName, s, env); err != nil { + lastSyncedBatchIdInRawTable, err := c.GetLastBatchIDInRawTable(ctx, flowJobName) + if err != nil { + return fmt.Errorf("failed to get last batch id in raw table: %w", err) + } + + batchIdToLoad := max(lastSyncedBatchIdInRawTable, normBatchID) + c.logger.Info("[clickhouse] pushing s3 data to raw table", + slog.Int64("BatchID", batchIdToLoad), + slog.String("flowJobName", flowJobName), + slog.Int64("syncBatchID", syncBatchID)) + + for s := batchIdToLoad + 1; s <= syncBatchID; s++ { + if err := c.copyAvroStageToDestination(ctx, flowJobName, s, env, version); err != nil { return fmt.Errorf("failed to copy avro stage to destination: %w", err) } + c.logger.Info("[clickhouse] setting last batch id in raw table", + slog.Int64("BatchID", s), + slog.String("flowJobName", flowJobName)) + if err := c.SetLastBatchIDInRawTable(ctx, flowJobName, s); err != nil { + c.logger.Error("[clickhouse] error while setting last batch id in raw table", + slog.Int64("BatchID", s), slog.Any("error", err)) + return fmt.Errorf("failed to set last batch id in raw table: %w", err) + } } return nil } diff --git a/flow/connectors/clickhouse/normalize_query.go b/flow/connectors/clickhouse/normalize_query.go new file mode 100644 index 0000000000..72763fb5c3 --- /dev/null +++ b/flow/connectors/clickhouse/normalize_query.go @@ -0,0 +1,285 @@ +package connclickhouse + +import ( + "context" + "fmt" + "strings" + + "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/model/qvalue" + peerdb_clickhouse "github.com/PeerDB-io/peerdb/flow/shared/clickhouse" + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +type NormalizeQueryGenerator struct { + tableNameSchemaMapping map[string]*protos.TableSchema + env map[string]string + Query string + TableName string + rawTableName string + tableMappings []*protos.TableMapping + Part uint64 + syncBatchID int64 + batchIDToLoadForTable int64 + numParts uint64 + enablePrimaryUpdate bool + sourceSchemaAsDestinationColumn bool +} + +// NewTableNormalizeQuery constructs a TableNormalizeQuery with required fields. +func NewNormalizeQueryGenerator( + tableName string, + part uint64, + tableNameSchemaMapping map[string]*protos.TableSchema, + tableMappings []*protos.TableMapping, + syncBatchID int64, + batchIDToLoadForTable int64, + numParts uint64, + enablePrimaryUpdate bool, + sourceSchemaAsDestinationColumn bool, + env map[string]string, + rawTableName string, +) *NormalizeQueryGenerator { + return &NormalizeQueryGenerator{ + TableName: tableName, + Part: part, + tableNameSchemaMapping: tableNameSchemaMapping, + tableMappings: tableMappings, + syncBatchID: syncBatchID, + batchIDToLoadForTable: batchIDToLoadForTable, + numParts: numParts, + enablePrimaryUpdate: enablePrimaryUpdate, + sourceSchemaAsDestinationColumn: sourceSchemaAsDestinationColumn, + env: env, + rawTableName: rawTableName, + } +} + +func (t *NormalizeQueryGenerator) BuildQuery(ctx context.Context) (string, error) { + selectQuery := strings.Builder{} + selectQuery.WriteString("SELECT ") + + colSelector := strings.Builder{} + colSelector.WriteByte('(') + + schema := t.tableNameSchemaMapping[t.TableName] + + var tableMapping *protos.TableMapping + for _, tm := range t.tableMappings { + if tm.DestinationTableIdentifier == t.TableName { + tableMapping = tm + break + } + } + + var escapedSourceSchemaSelectorFragment string + if t.sourceSchemaAsDestinationColumn { + escapedSourceSchemaSelectorFragment = fmt.Sprintf("JSONExtractString(_peerdb_data, %s) AS %s,", + peerdb_clickhouse.QuoteLiteral(sourceSchemaColName), + peerdb_clickhouse.QuoteIdentifier(sourceSchemaColName)) + } + + projection := strings.Builder{} + projectionUpdate := strings.Builder{} + + for _, column := range schema.Columns { + colName := column.Name + dstColName := colName + colType := types.QValueKind(column.Type) + + var clickHouseType string + var columnNullableEnabled bool + if tableMapping != nil { + for _, col := range tableMapping.Columns { + if col.SourceName == colName { + if col.DestinationName != "" { + dstColName = col.DestinationName + } + if col.DestinationType != "" { + // TODO basic validation to avoid injection + clickHouseType = col.DestinationType + } + columnNullableEnabled = col.NullableEnabled + break + } + } + } + + fmt.Fprintf(&colSelector, "%s,", peerdb_clickhouse.QuoteIdentifier(dstColName)) + if clickHouseType == "" { + var err error + clickHouseType, err = qvalue.ToDWHColumnType( + ctx, colType, t.env, protos.DBType_CLICKHOUSE, column, schema.NullableEnabled || columnNullableEnabled, + ) + if err != nil { + return "", fmt.Errorf("error while converting column type to clickhouse type: %w", err) + } + } + + switch clickHouseType { + case "Date32", "Nullable(Date32)": + fmt.Fprintf(&projection, + "toDate32(parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_data, %s),6)) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + if t.enablePrimaryUpdate { + fmt.Fprintf(&projectionUpdate, + "toDate32(parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_match_data, %s),6)) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + } + case "DateTime64(6)", "Nullable(DateTime64(6))": + if colType == types.QValueKindTime || colType == types.QValueKindTimeTZ { + // parseDateTime64BestEffortOrNull for hh:mm:ss puts the year as current year + // (or previous year if result would be in future) so explicitly anchor to unix epoch + fmt.Fprintf(&projection, + "parseDateTime64BestEffortOrNull('1970-01-01 ' || JSONExtractString(_peerdb_data, %s),6) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + if t.enablePrimaryUpdate { + fmt.Fprintf(&projectionUpdate, + "parseDateTime64BestEffortOrNull('1970-01-01 ' || JSONExtractString(_peerdb_match_data, %s),6) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + } + } else { + fmt.Fprintf(&projection, + "parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_data, %s),6) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + if t.enablePrimaryUpdate { + fmt.Fprintf(&projectionUpdate, + "parseDateTime64BestEffortOrNull(JSONExtractString(_peerdb_match_data, %s),6) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + } + } + case "Array(DateTime64(6))", "Nullable(Array(DateTime64(6)))": + fmt.Fprintf(&projection, + `arrayMap(x -> parseDateTime64BestEffortOrNull(trimBoth(x, '"'), 6), JSONExtractArrayRaw(_peerdb_data, %s)) AS %s,`, + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + if t.enablePrimaryUpdate { + fmt.Fprintf(&projectionUpdate, + `arrayMap(x -> parseDateTime64BestEffortOrNull(trimBoth(x, '"'), 6), JSONExtractArrayRaw(_peerdb_match_data, %s)) AS %s,`, + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + } + default: + projLen := projection.Len() + if colType == types.QValueKindBytes { + format, err := internal.PeerDBBinaryFormat(ctx, t.env) + if err != nil { + return "", err + } + switch format { + case internal.BinaryFormatRaw: + fmt.Fprintf(&projection, + "base64Decode(JSONExtractString(_peerdb_data, %s)) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + if t.enablePrimaryUpdate { + fmt.Fprintf(&projectionUpdate, + "base64Decode(JSONExtractString(_peerdb_match_data, %s)) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + } + case internal.BinaryFormatHex: + fmt.Fprintf(&projection, "hex(base64Decode(JSONExtractString(_peerdb_data, %s))) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + if t.enablePrimaryUpdate { + fmt.Fprintf(&projectionUpdate, + "hex(base64Decode(JSONExtractString(_peerdb_match_data, %s))) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + } + } + } + + // proceed with default logic if logic above didn't add any sql + if projection.Len() == projLen { + fmt.Fprintf( + &projection, + "JSONExtract(_peerdb_data, %s, %s) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteLiteral(clickHouseType), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + if t.enablePrimaryUpdate { + fmt.Fprintf( + &projectionUpdate, + "JSONExtract(_peerdb_match_data, %s, %s) AS %s,", + peerdb_clickhouse.QuoteLiteral(colName), + peerdb_clickhouse.QuoteLiteral(clickHouseType), + peerdb_clickhouse.QuoteIdentifier(dstColName), + ) + } + } + } + } + + if t.sourceSchemaAsDestinationColumn { + projection.WriteString(escapedSourceSchemaSelectorFragment) + fmt.Fprintf(&colSelector, "%s,", peerdb_clickhouse.QuoteIdentifier(sourceSchemaColName)) + } + + // add _peerdb_sign as _peerdb_record_type / 2 + fmt.Fprintf(&projection, "intDiv(_peerdb_record_type, 2) AS %s,", peerdb_clickhouse.QuoteIdentifier(signColName)) + fmt.Fprintf(&colSelector, "%s,", peerdb_clickhouse.QuoteIdentifier(signColName)) + + // add _peerdb_timestamp as _peerdb_version + fmt.Fprintf(&projection, "_peerdb_timestamp AS %s", peerdb_clickhouse.QuoteIdentifier(versionColName)) + fmt.Fprintf(&colSelector, "%s) ", peerdb_clickhouse.QuoteIdentifier(versionColName)) + + selectQuery.WriteString(projection.String()) + fmt.Fprintf(&selectQuery, + " FROM %s WHERE _peerdb_batch_id > %d AND _peerdb_batch_id <= %d AND _peerdb_destination_table_name = %s", + peerdb_clickhouse.QuoteIdentifier(t.rawTableName), t.batchIDToLoadForTable, t.syncBatchID, peerdb_clickhouse.QuoteLiteral(t.TableName)) + if t.numParts > 1 { + fmt.Fprintf(&selectQuery, " AND cityHash64(_peerdb_uid) %% %d = %d", t.numParts, t.Part) + } + + if t.enablePrimaryUpdate { + if t.sourceSchemaAsDestinationColumn { + projectionUpdate.WriteString(escapedSourceSchemaSelectorFragment) + } + + // projectionUpdate generates delete on previous record, so _peerdb_record_type is filled in as 2 + fmt.Fprintf(&projectionUpdate, "1 AS %s,", peerdb_clickhouse.QuoteIdentifier(signColName)) + // decrement timestamp by 1 so delete is ordered before latest data, + // could be same if deletion records were only generated when ordering updated + fmt.Fprintf(&projectionUpdate, "_peerdb_timestamp - 1 AS %s", peerdb_clickhouse.QuoteIdentifier(versionColName)) + + selectQuery.WriteString(" UNION ALL SELECT ") + selectQuery.WriteString(projectionUpdate.String()) + fmt.Fprintf(&selectQuery, + " FROM %s WHERE _peerdb_match_data != '' AND _peerdb_batch_id > %d AND _peerdb_batch_id <= %d"+ + " AND _peerdb_destination_table_name = %s AND _peerdb_record_type = 1", + peerdb_clickhouse.QuoteIdentifier(t.rawTableName), + t.batchIDToLoadForTable, t.syncBatchID, peerdb_clickhouse.QuoteLiteral(t.TableName)) + if t.numParts > 1 { + fmt.Fprintf(&selectQuery, " AND cityHash64(_peerdb_uid) %% %d = %d", t.numParts, t.Part) + } + } + + insertIntoSelectQuery := fmt.Sprintf("INSERT INTO %s %s %s", + peerdb_clickhouse.QuoteIdentifier(t.TableName), colSelector.String(), selectQuery.String()) + + t.Query = insertIntoSelectQuery + + return t.Query, nil +} diff --git a/flow/connectors/clickhouse/normalize_test.go b/flow/connectors/clickhouse/normalize_test.go index 3797f64d6f..2703cdc6aa 100644 --- a/flow/connectors/clickhouse/normalize_test.go +++ b/flow/connectors/clickhouse/normalize_test.go @@ -3,7 +3,10 @@ package connclickhouse import ( "testing" + "github.com/stretchr/testify/require" + "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func Test_GetOrderByColumns_WithColMap_AndOrdering(t *testing.T) { @@ -142,3 +145,206 @@ func Test_GetOrderByColumns_NoColMap_WithOrdering(t *testing.T) { } } } + +func TestBuildQuery_Basic(t *testing.T) { + ctx := t.Context() + tableName := "my_table" + rawTableName := "raw_my_table" + part := uint64(0) + numParts := uint64(1) + syncBatchID := int64(10) + batchIDToLoadForTable := int64(5) + enablePrimaryUpdate := false + sourceSchemaAsDestinationColumn := false + env := map[string]string{} + + // Table schema with two columns + tableSchema := &protos.TableSchema{ + Columns: []*protos.FieldDescription{ + {Name: "id", Type: string(types.QValueKindInt64)}, + {Name: "name", Type: string(types.QValueKindString)}, + }, + NullableEnabled: false, + } + tableNameSchemaMapping := map[string]*protos.TableSchema{ + tableName: tableSchema, + } + + tableMappings := []*protos.TableMapping{ + { + SourceTableIdentifier: "public.my_table", + DestinationTableIdentifier: tableName, + }, + } + + g := NewNormalizeQueryGenerator( + tableName, + part, + tableNameSchemaMapping, + tableMappings, + syncBatchID, + batchIDToLoadForTable, + numParts, + enablePrimaryUpdate, + sourceSchemaAsDestinationColumn, + env, + rawTableName, + ) + + query, err := g.BuildQuery(ctx) + require.NoError(t, err) + require.Contains(t, query, "INSERT INTO") + require.Contains(t, query, "SELECT") + require.Contains(t, query, "JSONExtract(_peerdb_data, 'id', 'Int64') AS `id`") + require.Contains(t, query, "JSONExtract(_peerdb_data, 'name', 'String') AS `name`") + require.Contains(t, query, "FROM `raw_my_table`") + require.Contains(t, query, "_peerdb_batch_id > 5 AND _peerdb_batch_id <= 10") + require.Contains(t, query, "_peerdb_destination_table_name = 'my_table'") +} + +func TestBuildQuery_WithPrimaryUpdate(t *testing.T) { + ctx := t.Context() + tableName := "my_table" + rawTableName := "raw_my_table" + part := uint64(0) + numParts := uint64(1) + syncBatchID := int64(10) + batchIDToLoadForTable := int64(5) + enablePrimaryUpdate := true + sourceSchemaAsDestinationColumn := false + env := map[string]string{} + + tableSchema := &protos.TableSchema{ + Columns: []*protos.FieldDescription{ + {Name: "id", Type: string(types.QValueKindInt64)}, + }, + NullableEnabled: false, + } + tableNameSchemaMapping := map[string]*protos.TableSchema{ + tableName: tableSchema, + } + + tableMappings := []*protos.TableMapping{ + { + SourceTableIdentifier: "public.my_table", + DestinationTableIdentifier: tableName, + }, + } + + g := NewNormalizeQueryGenerator( + tableName, + part, + tableNameSchemaMapping, + tableMappings, + syncBatchID, + batchIDToLoadForTable, + numParts, + enablePrimaryUpdate, + sourceSchemaAsDestinationColumn, + env, + rawTableName, + ) + + query, err := g.BuildQuery(ctx) + require.NoError(t, err) + require.Contains(t, query, "UNION ALL SELECT") + require.Contains(t, query, "JSONExtract(_peerdb_match_data, 'id', 'Int64') AS `id`") + require.Contains(t, query, "_peerdb_match_data != ''") + require.Contains(t, query, "_peerdb_record_type = 1") +} + +func TestBuildQuery_WithSourceSchemaAsDestinationColumn(t *testing.T) { + ctx := t.Context() + tableName := "my_table" + rawTableName := "raw_my_table" + part := uint64(0) + numParts := uint64(1) + syncBatchID := int64(10) + batchIDToLoadForTable := int64(5) + enablePrimaryUpdate := false + sourceSchemaAsDestinationColumn := true + env := map[string]string{} + + tableSchema := &protos.TableSchema{ + Columns: []*protos.FieldDescription{ + {Name: "id", Type: string(types.QValueKindInt64)}, + }, + NullableEnabled: false, + } + tableNameSchemaMapping := map[string]*protos.TableSchema{ + tableName: tableSchema, + } + + tableMappings := []*protos.TableMapping{ + { + SourceTableIdentifier: "public.my_table", + DestinationTableIdentifier: tableName, + }, + } + + g := NewNormalizeQueryGenerator( + tableName, + part, + tableNameSchemaMapping, + tableMappings, + syncBatchID, + batchIDToLoadForTable, + numParts, + enablePrimaryUpdate, + sourceSchemaAsDestinationColumn, + env, + rawTableName, + ) + + query, err := g.BuildQuery(ctx) + require.NoError(t, err) + require.Contains(t, query, " AS `_peerdb_source_schema`") +} + +func TestBuildQuery_WithNumParts(t *testing.T) { + ctx := t.Context() + tableName := "my_table" + rawTableName := "raw_my_table" + part := uint64(2) + numParts := uint64(4) + syncBatchID := int64(10) + batchIDToLoadForTable := int64(5) + enablePrimaryUpdate := false + sourceSchemaAsDestinationColumn := false + env := map[string]string{} + + tableSchema := &protos.TableSchema{ + Columns: []*protos.FieldDescription{ + {Name: "id", Type: string(types.QValueKindInt64)}, + }, + NullableEnabled: false, + } + tableNameSchemaMapping := map[string]*protos.TableSchema{ + tableName: tableSchema, + } + + tableMappings := []*protos.TableMapping{ + { + SourceTableIdentifier: "public.my_table", + DestinationTableIdentifier: tableName, + }, + } + + g := NewNormalizeQueryGenerator( + tableName, + part, + tableNameSchemaMapping, + tableMappings, + syncBatchID, + batchIDToLoadForTable, + numParts, + enablePrimaryUpdate, + sourceSchemaAsDestinationColumn, + env, + rawTableName, + ) + + query, err := g.BuildQuery(ctx) + require.NoError(t, err) + require.Contains(t, query, "cityHash64(_peerdb_uid) % 4 = 2") +} diff --git a/flow/connectors/clickhouse/qrep.go b/flow/connectors/clickhouse/qrep.go index 0326b7c940..ea07d2edf5 100644 --- a/flow/connectors/clickhouse/qrep.go +++ b/flow/connectors/clickhouse/qrep.go @@ -24,7 +24,7 @@ func (c *ClickHouseConnector) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { // Ensure the destination table is available. destTable := config.DestinationTableIdentifier flowLog := slog.Group("sync_metadata", diff --git a/flow/connectors/clickhouse/qrep_avro_sync.go b/flow/connectors/clickhouse/qrep_avro_sync.go index fd76979b99..100395fcab 100644 --- a/flow/connectors/clickhouse/qrep_avro_sync.go +++ b/flow/connectors/clickhouse/qrep_avro_sync.go @@ -11,14 +11,13 @@ import ( "github.com/hamba/avro/v2/ocf" "github.com/PeerDB-io/peerdb/flow/connectors/utils" - avro "github.com/PeerDB-io/peerdb/flow/connectors/utils/avro" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" peerdb_clickhouse "github.com/PeerDB-io/peerdb/flow/shared/clickhouse" "github.com/PeerDB-io/peerdb/flow/shared/exceptions" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type ClickHouseAvroSyncMethod struct { @@ -36,29 +35,47 @@ func NewClickHouseAvroSyncMethod( } } -func (s *ClickHouseAvroSyncMethod) CopyStageToDestination(ctx context.Context, avroFile *avro.AvroFile) error { +func (s *ClickHouseAvroSyncMethod) s3TableFunctionBuilder(ctx context.Context, avroFilePath string) (string, error) { stagingPath := s.credsProvider.BucketPath s3o, err := utils.NewS3BucketAndPrefix(stagingPath) if err != nil { - return err + return "", err } endpoint := s.credsProvider.Provider.GetEndpointURL() region := s.credsProvider.Provider.GetRegion() - avroFileUrl := utils.FileURLForS3Service(endpoint, region, s3o.Bucket, avroFile.FilePath) + avroFileUrl := utils.FileURLForS3Service(endpoint, region, s3o.Bucket, avroFilePath) creds, err := s.credsProvider.Provider.Retrieve(ctx) if err != nil { - return err + return "", err } - sessionTokenPart := "" + var expr strings.Builder + expr.WriteString("s3(") + expr.WriteString(peerdb_clickhouse.QuoteLiteral(avroFileUrl)) + expr.WriteByte(',') + expr.WriteString(peerdb_clickhouse.QuoteLiteral(creds.AWS.AccessKeyID)) + expr.WriteByte(',') + expr.WriteString(peerdb_clickhouse.QuoteLiteral(creds.AWS.SecretAccessKey)) if creds.AWS.SessionToken != "" { - sessionTokenPart = fmt.Sprintf(", '%s'", creds.AWS.SessionToken) + expr.WriteByte(',') + expr.WriteString(peerdb_clickhouse.QuoteLiteral(creds.AWS.SessionToken)) + } + expr.WriteString(",'Avro')") + return expr.String(), nil +} + +func (s *ClickHouseAvroSyncMethod) CopyStageToDestination(ctx context.Context, avroFile utils.AvroFile) error { + s3TableFunction, err := s.s3TableFunctionBuilder(ctx, avroFile.FilePath) + if err != nil { + s.logger.Error("failed to build S3 table function", + slog.String("avroFilePath", avroFile.FilePath), + slog.Any("error", err)) + return fmt.Errorf("failed to build S3 table function: %w", err) } - query := fmt.Sprintf("INSERT INTO `%s` SELECT * FROM s3('%s','%s','%s'%s, 'Avro')", - s.config.DestinationTableIdentifier, avroFileUrl, - creds.AWS.AccessKeyID, creds.AWS.SecretAccessKey, sessionTokenPart) + query := fmt.Sprintf("INSERT INTO %s SELECT * FROM %s", + peerdb_clickhouse.QuoteIdentifier(s.config.DestinationTableIdentifier), s3TableFunction) return s.exec(ctx, query) } @@ -84,7 +101,7 @@ func (s *ClickHouseAvroSyncMethod) SyncRecords( } batchIdentifierForFile := fmt.Sprintf("%s_%d", shared.RandomString(16), syncBatchID) - avroFile, err := s.writeToAvroFile(ctx, env, stream, nil, avroSchema, batchIdentifierForFile, flowJobName, nil) + avroFile, err := s.writeToAvroFile(ctx, env, stream, nil, avroSchema, batchIdentifierForFile, flowJobName, nil, nil) if err != nil { return 0, err } @@ -107,77 +124,77 @@ func (s *ClickHouseAvroSyncMethod) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { dstTableName := config.DestinationTableIdentifier startTime := time.Now() schema, err := stream.Schema() if err != nil { - return 0, err + return 0, nil, err } destTypeConversions := findTypeConversions(schema, config.Columns) if len(destTypeConversions) > 0 { schema = applyTypeConversions(schema, destTypeConversions) } + numericTruncator := model.NewSnapshotTableNumericTruncator(dstTableName, schema.Fields) columnNameAvroFieldMap := model.ConstructColumnNameAvroFieldMap(schema.Fields) - avroFile, err := s.pushDataToS3(ctx, config, dstTableName, schema, - columnNameAvroFieldMap, partition, stream, destTypeConversions) + avroFiles, totalRecords, err := s.pushDataToS3(ctx, config, dstTableName, schema, + columnNameAvroFieldMap, partition, stream, destTypeConversions, numericTruncator) if err != nil { s.logger.Error("failed to push data to S3", slog.String("dstTable", dstTableName), slog.Any("error", err)) - return 0, err + return 0, nil, err } if err := s.pushS3DataToClickHouse( - ctx, avroFile.FilePath, schema, columnNameAvroFieldMap, config); err != nil { + ctx, avroFiles, schema, columnNameAvroFieldMap, config); err != nil { s.logger.Error("failed to push data to ClickHouse", slog.String("dstTable", dstTableName), slog.Any("error", err)) - return 0, err + return 0, nil, err } + warnings := numericTruncator.Warnings() if err := s.FinishQRepPartition(ctx, partition, config.FlowJobName, startTime); err != nil { s.logger.Error("Failed to finish QRep partition", slog.Any("error", err)) - return 0, err + return 0, nil, err } - return avroFile.NumRecords, nil + return totalRecords, warnings, nil } func (s *ClickHouseAvroSyncMethod) pushDataToS3( ctx context.Context, config *protos.QRepConfig, dstTableName string, - schema qvalue.QRecordSchema, + schema types.QRecordSchema, columnNameAvroFieldMap map[string]string, partition *protos.QRepPartition, stream *model.QRecordStream, - destTypeConversions map[string]qvalue.TypeConversion, -) (*avro.AvroFile, error) { + destTypeConversions map[string]types.TypeConversion, + numericTruncator *model.SnapshotTableNumericTruncator, +) ([]utils.AvroFile, int64, error) { avroSchema, err := s.getAvroSchema(ctx, config.Env, dstTableName, schema, columnNameAvroFieldMap) if err != nil { - return nil, err + return nil, 0, err } avroChunking, err := internal.PeerDBS3BytesPerAvroFile(ctx, config.Env) if err != nil { - return nil, err + return nil, 0, err } - var avroFile *avro.AvroFile - if avroChunking != 0 { - avroFile = &avro.AvroFile{ - FilePath: "", - NumRecords: 0, - } + var avroFiles []utils.AvroFile + var totalRecords int64 + if avroChunking != 0 { chunkNum := 0 var done atomic.Bool for !done.Load() { if err := ctx.Err(); err != nil { - return nil, err + return nil, 0, err } substream := model.NewQRecordStream(0) @@ -200,61 +217,50 @@ func (s *ClickHouseAvroSyncMethod) pushDataToS3( subFile, err := s.writeToAvroFile(ctx, config.Env, substream, &avroSize, avroSchema, fmt.Sprintf("%s.%06d", partition.PartitionId, chunkNum), - config.FlowJobName, destTypeConversions) + config.FlowJobName, destTypeConversions, numericTruncator) if err != nil { - return nil, err - } - if chunkNum == 0 { - avroFile.FilePath = strings.TrimSuffix(subFile.FilePath, "000000.avro") + "*.avro" + return nil, 0, err } + avroFiles = append(avroFiles, subFile) chunkNum += 1 - avroFile.NumRecords += subFile.NumRecords + totalRecords += subFile.NumRecords } if err := ctx.Err(); err != nil { - return nil, err + return nil, 0, err } - } - - if avroFile == nil || avroFile.FilePath == "" { - var err error - avroFile, err = s.writeToAvroFile( - ctx, config.Env, stream, nil, avroSchema, partition.PartitionId, config.FlowJobName, destTypeConversions, + } else { + avroFile, err := s.writeToAvroFile( + ctx, config.Env, stream, nil, avroSchema, partition.PartitionId, config.FlowJobName, + destTypeConversions, numericTruncator, ) if err != nil { - return nil, err + return nil, 0, err } + avroFiles = append(avroFiles, avroFile) + totalRecords = avroFile.NumRecords } - return avroFile, nil + s.logger.Info("finished writing avro chunks to S3", + slog.String("partitionId", partition.PartitionId), + slog.Int("totalChunks", len(avroFiles)), + slog.Int64("totalRecords", totalRecords)) + + return avroFiles, totalRecords, nil } func (s *ClickHouseAvroSyncMethod) pushS3DataToClickHouse( ctx context.Context, - avroFilePath string, - schema qvalue.QRecordSchema, + avroFiles []utils.AvroFile, + schema types.QRecordSchema, columnNameAvroFieldMap map[string]string, config *protos.QRepConfig, ) error { - stagingPath := s.credsProvider.BucketPath - s3o, err := utils.NewS3BucketAndPrefix(stagingPath) - if err != nil { - return err - } - - creds, err := s.credsProvider.Provider.Retrieve(ctx) - if err != nil { - return err - } - sourceSchemaAsDestinationColumn, err := internal.PeerDBSourceSchemaAsDestinationColumn(ctx, config.Env) if err != nil { return err } - endpoint := s.credsProvider.Provider.GetEndpointURL() - region := s.credsProvider.Provider.GetRegion() - avroFileUrl := utils.FileURLForS3Service(endpoint, region, s3o.Bucket, avroFilePath) selectedColumnNames := make([]string, 0, len(schema.Fields)) insertedColumnNames := make([]string, 0, len(schema.Fields)) for _, colName := range schema.GetColumnNames() { @@ -270,8 +276,8 @@ func (s *ClickHouseAvroSyncMethod) pushS3DataToClickHouse( slog.String("avroFieldName", avroColName)) return fmt.Errorf("destination column %s not found in avro schema", colName) } - selectedColumnNames = append(selectedColumnNames, "`"+avroColName+"`") - insertedColumnNames = append(insertedColumnNames, "`"+colName+"`") + selectedColumnNames = append(selectedColumnNames, peerdb_clickhouse.QuoteIdentifier(avroColName)) + insertedColumnNames = append(insertedColumnNames, peerdb_clickhouse.QuoteIdentifier(colName)) } if sourceSchemaAsDestinationColumn { schemaTable, err := utils.ParseSchemaTable(config.WatermarkTable) @@ -279,16 +285,12 @@ func (s *ClickHouseAvroSyncMethod) pushS3DataToClickHouse( return err } - selectedColumnNames = append(selectedColumnNames, fmt.Sprintf("'%s'", peerdb_clickhouse.EscapeStr(schemaTable.Schema))) + selectedColumnNames = append(selectedColumnNames, peerdb_clickhouse.QuoteLiteral(schemaTable.Schema)) insertedColumnNames = append(insertedColumnNames, sourceSchemaColName) } selectorStr := strings.Join(selectedColumnNames, ",") insertedStr := strings.Join(insertedColumnNames, ",") - sessionTokenPart := "" - if creds.AWS.SessionToken != "" { - sessionTokenPart = fmt.Sprintf(", '%s'", creds.AWS.SessionToken) - } hashColName := columnNameAvroFieldMap[schema.Fields[0].Name] numParts, err := internal.PeerDBClickHouseInitialLoadPartsPerPartition(ctx, s.config.Env) @@ -298,26 +300,48 @@ func (s *ClickHouseAvroSyncMethod) pushS3DataToClickHouse( } numParts = max(numParts, 1) - for i := range numParts { - var whereClause string - if numParts > 1 { - whereClause = fmt.Sprintf(" WHERE cityHash64(`%s`) %% %d = %d", hashColName, numParts, i) - } - query := fmt.Sprintf( - "INSERT INTO `%s`(%s) SELECT %s FROM s3('%s','%s','%s'%s,'Avro')%s SETTINGS throw_on_max_partitions_per_insert_block = 0", - config.DestinationTableIdentifier, insertedStr, selectorStr, avroFileUrl, - creds.AWS.AccessKeyID, creds.AWS.SecretAccessKey, sessionTokenPart, whereClause) - s.logger.Info("inserting part", - slog.String("query", query), - slog.Uint64("part", i), - slog.Uint64("numParts", numParts)) - if err := s.exec(ctx, query); err != nil { - s.logger.Error("failed to insert part", - slog.String("query", query), + // Process each chunk file individually + for chunkIdx, avroFile := range avroFiles { + s.logger.Info("processing chunk", + slog.Int("chunkIdx", chunkIdx), + slog.Int("totalChunks", len(avroFiles)), + slog.String("avroFilePath", avroFile.FilePath)) + + for i := range numParts { + // Get fresh credentials for each part + s3TableFunction, err := s.s3TableFunctionBuilder(ctx, avroFile.FilePath) + if err != nil { + s.logger.Error("failed to build S3 table function", + slog.String("avroFilePath", avroFile.FilePath), + slog.Any("error", err), + slog.Uint64("part", i), + slog.Uint64("numParts", numParts), + slog.Int("chunkIdx", chunkIdx), + ) + return fmt.Errorf("failed to build S3 table function: %w", err) + } + + var whereClause string + if numParts > 1 { + whereClause = fmt.Sprintf(" WHERE cityHash64(%s) %% %d = %d", peerdb_clickhouse.QuoteIdentifier(hashColName), numParts, i) + } + + query := fmt.Sprintf( + "INSERT INTO %s(%s) SELECT %s FROM %s%s SETTINGS throw_on_max_partitions_per_insert_block = 0", + peerdb_clickhouse.QuoteIdentifier(config.DestinationTableIdentifier), insertedStr, selectorStr, s3TableFunction, whereClause) + s.logger.Info("inserting part", slog.Uint64("part", i), slog.Uint64("numParts", numParts), - slog.Any("error", err)) - return exceptions.NewQRepSyncError(err, config.DestinationTableIdentifier, s.ClickHouseConnector.config.Database) + slog.Int("chunkIdx", chunkIdx), + slog.Int("totalChunks", len(avroFiles))) + if err := s.exec(ctx, query); err != nil { + s.logger.Error("failed to insert part", + slog.Uint64("part", i), + slog.Uint64("numParts", numParts), + slog.Int("chunkIdx", chunkIdx), + slog.Any("error", err)) + return exceptions.NewQRepSyncError(err, config.DestinationTableIdentifier, s.ClickHouseConnector.config.Database) + } } } @@ -328,7 +352,7 @@ func (s *ClickHouseAvroSyncMethod) getAvroSchema( ctx context.Context, env map[string]string, dstTableName string, - schema qvalue.QRecordSchema, + schema types.QRecordSchema, avroNameMap map[string]string, ) (*model.QRecordAvroSchemaDefinition, error) { avroSchema, err := model.GetAvroSchemaDefinition(ctx, env, dstTableName, schema, protos.DBType_CLICKHOUSE, avroNameMap) @@ -346,20 +370,23 @@ func (s *ClickHouseAvroSyncMethod) writeToAvroFile( avroSchema *model.QRecordAvroSchemaDefinition, identifierForFile string, flowJobName string, - typeConversions map[string]qvalue.TypeConversion, -) (*avro.AvroFile, error) { + typeConversions map[string]types.TypeConversion, + numericTruncator *model.SnapshotTableNumericTruncator, +) (utils.AvroFile, error) { stagingPath := s.credsProvider.BucketPath - ocfWriter := avro.NewPeerDBOCFWriter(stream, avroSchema, ocf.ZStandard, protos.DBType_CLICKHOUSE) + ocfWriter := utils.NewPeerDBOCFWriter(stream, avroSchema, ocf.ZStandard, protos.DBType_CLICKHOUSE) s3o, err := utils.NewS3BucketAndPrefix(stagingPath) if err != nil { - return nil, fmt.Errorf("failed to parse staging path: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to parse staging path: %w", err) } s3AvroFileKey := fmt.Sprintf("%s/%s/%s.avro", s3o.Prefix, flowJobName, identifierForFile) s3AvroFileKey = strings.TrimLeft(s3AvroFileKey, "/") - avroFile, err := ocfWriter.WriteRecordsToS3(ctx, env, s3o.Bucket, s3AvroFileKey, s.credsProvider.Provider, avroSize, typeConversions) + avroFile, err := ocfWriter.WriteRecordsToS3( + ctx, env, s3o.Bucket, s3AvroFileKey, s.credsProvider.Provider, avroSize, typeConversions, numericTruncator, + ) if err != nil { - return nil, fmt.Errorf("failed to write records to S3: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to write records to S3: %w", err) } return avroFile, nil diff --git a/flow/connectors/clickhouse/s3_iam_role_test.go b/flow/connectors/clickhouse/s3_iam_role_test.go index 39221397f9..25f41b86b9 100644 --- a/flow/connectors/clickhouse/s3_iam_role_test.go +++ b/flow/connectors/clickhouse/s3_iam_role_test.go @@ -8,7 +8,7 @@ import ( "github.com/stretchr/testify/require" - avro "github.com/PeerDB-io/peerdb/flow/connectors/utils/avro" + "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" ) @@ -56,9 +56,9 @@ func TestIAMRoleCanIssueSelectFromS3(t *testing.T) { avroSync := NewClickHouseAvroSyncMethod(&protos.QRepConfig{ DestinationTableIdentifier: table.TableIdentifier, }, conn) - require.NoError(t, avroSync.CopyStageToDestination(ctx, &avro.AvroFile{ + require.NoError(t, avroSync.CopyStageToDestination(ctx, utils.AvroFile{ FilePath: "test-iam-role-can-issue-select-from-s3/datafile.avro.zst", - StorageLocation: avro.AvroS3Storage, + StorageLocation: utils.AvroS3Storage, NumRecords: 3, })) diff --git a/flow/connectors/clickhouse/s3_stage.go b/flow/connectors/clickhouse/s3_stage.go index 4b3df4640d..d87042ab5f 100644 --- a/flow/connectors/clickhouse/s3_stage.go +++ b/flow/connectors/clickhouse/s3_stage.go @@ -8,7 +8,7 @@ import ( "github.com/jackc/pgx/v5" - utils "github.com/PeerDB-io/peerdb/flow/connectors/utils/avro" + "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/internal" ) @@ -16,7 +16,7 @@ func SetAvroStage( ctx context.Context, flowJobName string, syncBatchID int64, - avroFile *utils.AvroFile, + avroFile utils.AvroFile, ) error { avroFileJSON, err := json.Marshal(avroFile) if err != nil { @@ -41,10 +41,10 @@ func SetAvroStage( return nil } -func GetAvroStage(ctx context.Context, flowJobName string, syncBatchID int64) (*utils.AvroFile, error) { +func GetAvroStage(ctx context.Context, flowJobName string, syncBatchID int64) (utils.AvroFile, error) { conn, err := internal.GetCatalogConnectionPoolFromEnv(ctx) if err != nil { - return nil, fmt.Errorf("failed to get connection: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to get connection: %w", err) } var avroFileJSON []byte @@ -54,15 +54,15 @@ func GetAvroStage(ctx context.Context, flowJobName string, syncBatchID int64) (* flowJobName, syncBatchID, ).Scan(&avroFileJSON); err != nil { if errors.Is(err, pgx.ErrNoRows) { - return nil, fmt.Errorf("no avro stage found for flow job %s and sync batch %d", flowJobName, syncBatchID) + return utils.AvroFile{}, fmt.Errorf("no avro stage found for flow job %s and sync batch %d", flowJobName, syncBatchID) } - return nil, fmt.Errorf("failed to get avro stage: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to get avro stage: %w", err) } var avroFile utils.AvroFile if err := json.Unmarshal(avroFileJSON, &avroFile); err != nil { - return nil, fmt.Errorf("failed to unmarshal avro file: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to unmarshal avro file: %w", err) } - return &avroFile, nil + return avroFile, nil } diff --git a/flow/connectors/clickhouse/type_conversion.go b/flow/connectors/clickhouse/type_conversion.go index e807c15ebc..799286cddb 100644 --- a/flow/connectors/clickhouse/type_conversion.go +++ b/flow/connectors/clickhouse/type_conversion.go @@ -2,40 +2,13 @@ package connclickhouse import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/clickhouse" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) -/* -This file handles the mapping for ClickHouse destination types and -their corresponding TypeConversion implementations. A TypeConversion -object contains two functions: one for schema conversion (QField) and -one for value conversion (QValue). This allows the avro writer to -stage the schema/data in the converted type format, and therefore -successfully uploaded to the desired destination type in ClickHouse. - -To add a type conversion: - (1) In flow/model/qvalue/type_converter.go: - - implement a SchemaConversionFn interface to convert the QField type - - implement a ValueConversionFn interface to convert the QValue data - - (2) Add the new conversion to the `supportedDestinationTypes` map here - (if destination type doesn't exist, create a new map entry for it). - -The GetColumnsTypeConversion function returns the full list of supported -type conversions. Note that the source types are QValueKind, this allows -the implementation to be source-connector agnostic. -*/ - -var supportedDestinationTypes = map[string][]qvalue.TypeConversion{ - "String": {qvalue.NewTypeConversion( - qvalue.NumericToStringSchemaConversion, - qvalue.NumericToStringValueConversion, - )}, -} - func GetColumnsTypeConversion() (*protos.ColumnsTypeConversionResponse, error) { res := make([]*protos.ColumnsTypeConversion, 0) - for qkind, destTypes := range listSupportedTypeConversions() { + for qkind, destTypes := range clickhouse.ListSupportedTypeConversions() { res = append(res, &protos.ColumnsTypeConversion{ Qkind: string(qkind), DestinationTypes: destTypes, @@ -46,21 +19,10 @@ func GetColumnsTypeConversion() (*protos.ColumnsTypeConversionResponse, error) { }, nil } -func listSupportedTypeConversions() map[qvalue.QValueKind][]string { - typeConversions := make(map[qvalue.QValueKind][]string) - - for dstType, l := range supportedDestinationTypes { - for _, conversion := range l { - typeConversions[conversion.FromKind()] = append(typeConversions[conversion.FromKind()], dstType) - } - } - return typeConversions -} - -func findTypeConversions(schema qvalue.QRecordSchema, columns []*protos.ColumnSetting) map[string]qvalue.TypeConversion { - typeConversions := make(map[string]qvalue.TypeConversion) +func findTypeConversions(schema types.QRecordSchema, columns []*protos.ColumnSetting) map[string]types.TypeConversion { + typeConversions := make(map[string]types.TypeConversion) - colNameToType := make(map[string]qvalue.QValueKind, len(schema.Fields)) + colNameToType := make(map[string]types.QValueKind, len(schema.Fields)) for _, field := range schema.Fields { colNameToType[field.Name] = field.Type } @@ -70,7 +32,7 @@ func findTypeConversions(schema qvalue.QRecordSchema, columns []*protos.ColumnSe if !exist { continue } - conversions, exist := supportedDestinationTypes[col.DestinationType] + conversions, exist := clickhouse.SupportedDestinationTypes[col.DestinationType] if !exist { continue } @@ -84,7 +46,7 @@ func findTypeConversions(schema qvalue.QRecordSchema, columns []*protos.ColumnSe return typeConversions } -func applyTypeConversions(schema qvalue.QRecordSchema, typeConversions map[string]qvalue.TypeConversion) qvalue.QRecordSchema { +func applyTypeConversions(schema types.QRecordSchema, typeConversions map[string]types.TypeConversion) types.QRecordSchema { for i, field := range schema.Fields { if conversion, exist := typeConversions[field.Name]; exist { schema.Fields[i] = conversion.SchemaConversion(field) diff --git a/flow/connectors/core.go b/flow/connectors/core.go index 9e2c135520..2eab92b03f 100644 --- a/flow/connectors/core.go +++ b/flow/connectors/core.go @@ -14,6 +14,7 @@ import ( connelasticsearch "github.com/PeerDB-io/peerdb/flow/connectors/elasticsearch" conneventhub "github.com/PeerDB-io/peerdb/flow/connectors/eventhub" connkafka "github.com/PeerDB-io/peerdb/flow/connectors/kafka" + connmongo "github.com/PeerDB-io/peerdb/flow/connectors/mongo" connmysql "github.com/PeerDB-io/peerdb/flow/connectors/mysql" connpostgres "github.com/PeerDB-io/peerdb/flow/connectors/postgres" connpubsub "github.com/PeerDB-io/peerdb/flow/connectors/pubsub" @@ -59,8 +60,9 @@ type GetTableSchemaConnector interface { GetTableSchema( ctx context.Context, env map[string]string, + version uint32, system protos.TypeSystem, - tableIdentifiers []*protos.TableMapping, + tableMappings []*protos.TableMapping, ) (map[string]*protos.TableSchema, error) } @@ -68,7 +70,7 @@ type GetSchemaConnector interface { Connector GetAllTables(context.Context) (*protos.AllTablesResponse, error) - GetColumns(ctx context.Context, schema string, table string) (*protos.TableColumnsResponse, error) + GetColumns(ctx context.Context, version uint32, schema string, table string) (*protos.TableColumnsResponse, error) GetSchemas(ctx context.Context) (*protos.PeerSchemasResponse, error) GetTablesInSchema(ctx context.Context, schema string, cdcEnabled bool) (*protos.SchemaTablesResponse, error) } @@ -164,8 +166,8 @@ type NormalizedTablesConnector interface { ctx context.Context, tx any, config *protos.SetupNormalizedTableBatchInput, - tableIdentifier string, - tableSchema *protos.TableSchema, + destinationTableIdentifier string, + sourceTableSchema *protos.TableSchema, ) (bool, error) } @@ -267,18 +269,18 @@ type QRepSyncConnector interface { QRepSyncConnectorCore // SyncQRepRecords syncs the records for a given partition. - // returns the number of records synced. + // returns the number of records synced and a slice of warnings to report to the user. SyncQRepRecords(ctx context.Context, config *protos.QRepConfig, partition *protos.QRepPartition, - stream *model.QRecordStream) (int64, error) + stream *model.QRecordStream) (int64, shared.QRepWarnings, error) } type QRepSyncPgConnector interface { QRepSyncConnectorCore // SyncPgQRepRecords syncs the records for a given partition. - // returns the number of records synced. + // returns the number of records synced and a slice of warnings to report to the user. SyncPgQRepRecords(ctx context.Context, config *protos.QRepConfig, partition *protos.QRepPartition, - stream connpostgres.PgCopyReader) (int64, error) + stream connpostgres.PgCopyReader) (int64, shared.QRepWarnings, error) } type QRepConsolidateConnector interface { @@ -370,12 +372,6 @@ func LoadPeer(ctx context.Context, catalogPool shared.CatalogPool, peerName stri return nil, fmt.Errorf("failed to unmarshal Snowflake config: %w", err) } peer.Config = &protos.Peer_SnowflakeConfig{SnowflakeConfig: &config} - case protos.DBType_MONGO: - var config protos.MongoConfig - if err := proto.Unmarshal(peerOptions, &config); err != nil { - return nil, fmt.Errorf("failed to unmarshal MongoDB config: %w", err) - } - peer.Config = &protos.Peer_MongoConfig{MongoConfig: &config} case protos.DBType_POSTGRES: var config protos.PostgresConfig if err := proto.Unmarshal(peerOptions, &config); err != nil { @@ -394,6 +390,12 @@ func LoadPeer(ctx context.Context, catalogPool shared.CatalogPool, peerName stri return nil, fmt.Errorf("failed to unmarshal SQL Server config: %w", err) } peer.Config = &protos.Peer_SqlserverConfig{SqlserverConfig: &config} + case protos.DBType_MONGO: + var config protos.MongoConfig + if err := proto.Unmarshal(peerOptions, &config); err != nil { + return nil, fmt.Errorf("failed to unmarshal MongoDB config: %w", err) + } + peer.Config = &protos.Peer_MongoConfig{MongoConfig: &config} case protos.DBType_MYSQL: var config protos.MySqlConfig if err := proto.Unmarshal(peerOptions, &config); err != nil { @@ -449,6 +451,8 @@ func GetConnector(ctx context.Context, env map[string]string, config *protos.Pee return conneventhub.NewEventHubConnector(ctx, inner.EventhubGroupConfig) case *protos.Peer_S3Config: return conns3.NewS3Connector(ctx, inner.S3Config) + case *protos.Peer_MongoConfig: + return connmongo.NewMongoConnector(ctx, inner.MongoConfig) case *protos.Peer_MysqlConfig: return connmysql.NewMySqlConnector(ctx, inner.MysqlConfig) case *protos.Peer_ClickhouseConfig: @@ -498,6 +502,7 @@ func CloseConnector(ctx context.Context, conn Connector) { var ( _ CDCPullConnector = &connpostgres.PostgresConnector{} _ CDCPullConnector = &connmysql.MySqlConnector{} + _ CDCPullConnector = &connmongo.MongoConnector{} _ CDCPullPgConnector = &connpostgres.PostgresConnector{} @@ -525,6 +530,7 @@ var ( _ GetSchemaConnector = &connpostgres.PostgresConnector{} _ GetSchemaConnector = &connmysql.MySqlConnector{} + _ GetSchemaConnector = &connmongo.MongoConnector{} _ NormalizedTablesConnector = &connpostgres.PostgresConnector{} _ NormalizedTablesConnector = &connbigquery.BigQueryConnector{} @@ -536,6 +542,7 @@ var ( _ QRepPullConnector = &connpostgres.PostgresConnector{} _ QRepPullConnector = &connmysql.MySqlConnector{} + _ QRepPullConnector = &connmongo.MongoConnector{} _ QRepPullPgConnector = &connpostgres.PostgresConnector{} @@ -577,4 +584,5 @@ var ( _ GetVersionConnector = &connclickhouse.ClickHouseConnector{} _ GetVersionConnector = &connpostgres.PostgresConnector{} _ GetVersionConnector = &connmysql.MySqlConnector{} + _ GetVersionConnector = &connmongo.MongoConnector{} ) diff --git a/flow/connectors/elasticsearch/elasticsearch.go b/flow/connectors/elasticsearch/elasticsearch.go index 77f07b080d..b951311b84 100644 --- a/flow/connectors/elasticsearch/elasticsearch.go +++ b/flow/connectors/elasticsearch/elasticsearch.go @@ -24,8 +24,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) const ( @@ -103,7 +103,7 @@ func recordItemsProcessor(items model.RecordItems) ([]byte, error) { qRecordJsonMap := make(map[string]any) for key, val := range items.ColToVal { - if r, ok := val.(qvalue.QValueJSON); ok { // JSON is stored as a string, fix that + if r, ok := val.(types.QValueJSON); ok { // JSON is stored as a string, fix that qRecordJsonMap[key] = json.RawMessage( shared.UnsafeFastStringToReadOnlyBytes(r.Val)) } else { diff --git a/flow/connectors/elasticsearch/qrep.go b/flow/connectors/elasticsearch/qrep.go index f56af0734e..e16e0b6e21 100644 --- a/flow/connectors/elasticsearch/qrep.go +++ b/flow/connectors/elasticsearch/qrep.go @@ -16,8 +16,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func (esc *ElasticsearchConnector) SetupQRepMetadataTables(ctx context.Context, @@ -26,7 +26,7 @@ func (esc *ElasticsearchConnector) SetupQRepMetadataTables(ctx context.Context, return nil } -func upsertKeyColsHash(qRecord []qvalue.QValue, upsertColIndices []int) string { +func upsertKeyColsHash(qRecord []types.QValue, upsertColIndices []int) string { hasher := sha256.New() for _, upsertColIndex := range upsertColIndices { @@ -39,12 +39,12 @@ func upsertKeyColsHash(qRecord []qvalue.QValue, upsertColIndices []int) string { func (esc *ElasticsearchConnector) SyncQRepRecords(ctx context.Context, config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { startTime := time.Now() schema, err := stream.Schema() if err != nil { - return 0, err + return 0, nil, err } var bulkIndexFatalError error @@ -78,7 +78,7 @@ func (esc *ElasticsearchConnector) SyncQRepRecords(ctx context.Context, config * }) if err != nil { esc.logger.Error("[es] failed to initialize bulk indexer", slog.Any("error", err)) - return 0, fmt.Errorf("[es] failed to initialize bulk indexer: %w", err) + return 0, nil, fmt.Errorf("[es] failed to initialize bulk indexer: %w", err) } defer func() { if !bulkIndexerHasShutdown { @@ -100,7 +100,7 @@ func (esc *ElasticsearchConnector) SyncQRepRecords(ctx context.Context, config * docId = upsertKeyColsHash(qRecord, upsertKeyColIndices) } for i, field := range schema.Fields { - if r, ok := qRecord[i].(qvalue.QValueJSON); ok { // JSON is stored as a string, fix that + if r, ok := qRecord[i].(types.QValueJSON); ok { // JSON is stored as a string, fix that qRecordJsonMap[field.Name] = json.RawMessage( shared.UnsafeFastStringToReadOnlyBytes(r.Val)) } else { @@ -110,7 +110,7 @@ func (esc *ElasticsearchConnector) SyncQRepRecords(ctx context.Context, config * qRecordJsonBytes, err := json.Marshal(qRecordJsonMap) if err != nil { esc.logger.Error("[es] failed to json.Marshal record", slog.Any("error", err)) - return 0, fmt.Errorf("[es] failed to json.Marshal record: %w", err) + return 0, nil, fmt.Errorf("[es] failed to json.Marshal record: %w", err) } if err := esBulkIndexer.Add(ctx, esutil.BulkIndexerItem{ @@ -141,11 +141,11 @@ func (esc *ElasticsearchConnector) SyncQRepRecords(ctx context.Context, config * }, }); err != nil { esc.logger.Error("[es] failed to add record to bulk indexer", slog.Any("error", err)) - return 0, fmt.Errorf("[es] failed to add record to bulk indexer: %w", err) + return 0, nil, fmt.Errorf("[es] failed to add record to bulk indexer: %w", err) } if bulkIndexFatalError != nil { esc.logger.Error("[es] fatal error while indexing record", slog.Any("error", bulkIndexFatalError)) - return 0, fmt.Errorf("[es] fatal error while indexing record: %w", bulkIndexFatalError) + return 0, nil, fmt.Errorf("[es] fatal error while indexing record: %w", bulkIndexFatalError) } // update here instead of OnSuccess, if we close successfully it should match @@ -154,11 +154,11 @@ func (esc *ElasticsearchConnector) SyncQRepRecords(ctx context.Context, config * if err := stream.Err(); err != nil { esc.logger.Error("[es] failed to get record from stream", slog.Any("error", err)) - return 0, fmt.Errorf("[es] failed to get record from stream: %w", err) + return 0, nil, fmt.Errorf("[es] failed to get record from stream: %w", err) } if err := esBulkIndexer.Close(ctx); err != nil { esc.logger.Error("[es] failed to close bulk indexer", slog.Any("error", err)) - return 0, fmt.Errorf("[es] failed to close bulk indexer: %w", err) + return 0, nil, fmt.Errorf("[es] failed to close bulk indexer: %w", err) } bulkIndexerHasShutdown = true if len(bulkIndexErrors) > 0 { @@ -169,7 +169,7 @@ func (esc *ElasticsearchConnector) SyncQRepRecords(ctx context.Context, config * if err := esc.FinishQRepPartition(ctx, partition, config.FlowJobName, startTime); err != nil { esc.logger.Error("[es] failed to log partition info", slog.Any("error", err)) - return 0, fmt.Errorf("[es] failed to log partition info: %w", err) + return 0, nil, fmt.Errorf("[es] failed to log partition info: %w", err) } - return numRecords, nil + return numRecords, nil, nil } diff --git a/flow/connectors/eventhub/eventhub.go b/flow/connectors/eventhub/eventhub.go index 101482aae6..ed15922fc4 100644 --- a/flow/connectors/eventhub/eventhub.go +++ b/flow/connectors/eventhub/eventhub.go @@ -8,7 +8,7 @@ import ( "time" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" - azeventhubs "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/v2" lua "github.com/yuin/gopher-lua" "go.temporal.io/sdk/log" diff --git a/flow/connectors/eventhub/hub_batches.go b/flow/connectors/eventhub/hub_batches.go index d246d6c53d..9d1ef4bc80 100644 --- a/flow/connectors/eventhub/hub_batches.go +++ b/flow/connectors/eventhub/hub_batches.go @@ -8,7 +8,7 @@ import ( "sync/atomic" "time" - azeventhubs "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/v2" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/shared" diff --git a/flow/connectors/eventhub/hubmanager.go b/flow/connectors/eventhub/hubmanager.go index 3d9298c766..81a2844854 100644 --- a/flow/connectors/eventhub/hubmanager.go +++ b/flow/connectors/eventhub/hubmanager.go @@ -10,7 +10,7 @@ import ( "time" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" - "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/v2" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub" cmap "github.com/orcaman/concurrent-map/v2" diff --git a/flow/connectors/external_metadata/store.go b/flow/connectors/external_metadata/store.go index beefea8678..193685e318 100644 --- a/flow/connectors/external_metadata/store.go +++ b/flow/connectors/external_metadata/store.go @@ -2,9 +2,11 @@ package connmetadata import ( "context" + "encoding/json" "errors" "fmt" "log/slog" + "strconv" "time" "github.com/jackc/pglogrepl" @@ -68,6 +70,80 @@ func (p *PostgresMetadata) NeedsSetupMetadataTables(_ context.Context) (bool, er return false, nil } +// GetLastNormalizedBatchIDForTable returns the last batch ID normalized for the given target table. +func (p *PostgresMetadata) GetLastNormalizedBatchIDForTable(ctx context.Context, jobName string, dstTableName string) (int64, error) { + var tableBatchIDDataJSON string + if err := p.pool.QueryRow(ctx, + `SELECT table_batch_id_data FROM `+lastSyncStateTableName+` WHERE job_name = $1`, + jobName, + ).Scan(&tableBatchIDDataJSON); err != nil { + if errors.Is(err, pgx.ErrNoRows) { + return 0, nil + } + + p.logger.Error("failed to get last synced batch id for table", "error", err) + return 0, err + } + + var tableBatchIDData map[string]int64 + if err := json.Unmarshal([]byte(tableBatchIDDataJSON), &tableBatchIDData); err != nil { + return 0, fmt.Errorf("failed to unmarshal table batch id data: %w", err) + } + + lastSyncedBatchID, ok := tableBatchIDData[dstTableName] + if !ok { + return 0, nil + } + + return lastSyncedBatchID, nil +} + +// SetLastNormalizedBatchIDForTable updates the last batch ID normalized for the given target table. +func (p *PostgresMetadata) SetLastNormalizedBatchIDForTable(ctx context.Context, jobName string, dstTableName string, batchID int64) error { + if _, err := p.pool.Exec(ctx, + `UPDATE `+lastSyncStateTableName+` + SET table_batch_id_data = jsonb_set(table_batch_id_data::jsonb, ARRAY[$2], $3::jsonb, true) + WHERE job_name = $1`, jobName, dstTableName, strconv.FormatInt(batchID, 10), + ); err != nil { + p.logger.Error("failed to update table batch id data", "error", err) + return fmt.Errorf("failed to update table batch id data: %w", err) + } + + return nil +} + +// GetLastBatchIDInRawTable returns the last batch ID in the raw table. +func (p *PostgresMetadata) GetLastBatchIDInRawTable(ctx context.Context, jobName string) (int64, error) { + var latestBatchIDInRawTable pgtype.Int8 + if err := p.pool.QueryRow(ctx, + `SELECT latest_batch_id_in_raw_table FROM `+lastSyncStateTableName+` WHERE job_name = $1`, + jobName, + ).Scan(&latestBatchIDInRawTable); err != nil { + if errors.Is(err, pgx.ErrNoRows) { + return 0, nil + } + + p.logger.Error("failed to get last batch id in raw table", "error", err) + return 0, err + } + + return latestBatchIDInRawTable.Int64, nil +} + +// SetLastBatchIDInRawTable updates the last batch ID in the raw table. +func (p *PostgresMetadata) SetLastBatchIDInRawTable(ctx context.Context, jobName string, batchID int64) error { + if _, err := p.pool.Exec(ctx, + `UPDATE `+lastSyncStateTableName+` + SET latest_batch_id_in_raw_table = $2 + WHERE job_name = $1`, jobName, batchID, + ); err != nil { + p.logger.Error("failed to update last batch id in raw table", "error", err) + return err + } + + return nil +} + func (p *PostgresMetadata) SetupMetadataTables(_ context.Context) error { return nil } @@ -124,8 +200,6 @@ func (p *PostgresMetadata) GetLastNormalizeBatchID(ctx context.Context, jobName p.logger.Error("failed to get last normalize", "error", err) return 0, err } - p.logger.Info("got last normalize batch normalize id for job", "batch id", normalizeBatchID.Int64) - return normalizeBatchID.Int64, nil } @@ -167,7 +241,6 @@ func (p *PostgresMetadata) FinishBatch(ctx context.Context, jobName string, sync } func (p *PostgresMetadata) UpdateNormalizeBatchID(ctx context.Context, jobName string, batchID int64) error { - p.logger.Info("updating normalize batch id for job", slog.Int64("normalizeBatchID", batchID)) if _, err := p.pool.Exec(ctx, `UPDATE `+lastSyncStateTableName+` SET normalize_batch_id=$2 WHERE job_name=$1`, jobName, batchID, ); err != nil { diff --git a/flow/connectors/kafka/qrep.go b/flow/connectors/kafka/qrep.go index 287862369f..498d1dcbde 100644 --- a/flow/connectors/kafka/qrep.go +++ b/flow/connectors/kafka/qrep.go @@ -24,18 +24,18 @@ func (c *KafkaConnector) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { startTime := time.Now() numRecords := atomic.Int64{} schema, err := stream.Schema() if err != nil { - return 0, err + return 0, nil, err } queueCtx, queueErr := context.WithCancelCause(ctx) pool, err := c.createPool(queueCtx, config.Env, config.Script, config.FlowJobName, nil, queueErr) if err != nil { - return 0, err + return 0, nil, err } defer pool.Close() @@ -107,14 +107,14 @@ Loop: } if err := pool.Wait(queueCtx); err != nil { - return 0, err + return 0, nil, err } if err := c.client.Flush(queueCtx); err != nil { - return 0, fmt.Errorf("[kafka] final flush error: %w", err) + return 0, nil, fmt.Errorf("[kafka] final flush error: %w", err) } if err := c.FinishQRepPartition(ctx, partition, config.FlowJobName, startTime); err != nil { - return 0, err + return 0, nil, err } - return numRecords.Load(), nil + return numRecords.Load(), nil, nil } diff --git a/flow/connectors/mongo/mongo.go b/flow/connectors/mongo/mongo.go new file mode 100644 index 0000000000..4065456434 --- /dev/null +++ b/flow/connectors/mongo/mongo.go @@ -0,0 +1,426 @@ +package connmongo + +import ( + "context" + "encoding/base64" + "errors" + "fmt" + "log/slog" + "time" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo" + "go.mongodb.org/mongo-driver/v2/mongo/options" + "go.mongodb.org/mongo-driver/v2/mongo/readpref" + "go.temporal.io/sdk/log" + + "github.com/PeerDB-io/peerdb/flow/alerting" + metadataStore "github.com/PeerDB-io/peerdb/flow/connectors/external_metadata" + "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/model" + "github.com/PeerDB-io/peerdb/flow/otel_metrics" + "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +const ( + DefaultDocumentKeyColumnName = "_id" + DefaultFullDocumentColumnName = "_full_document" +) + +type MongoConnector struct { + *metadataStore.PostgresMetadata + config *protos.MongoConfig + client *mongo.Client + logger log.Logger +} + +func NewMongoConnector(ctx context.Context, config *protos.MongoConfig) (*MongoConnector, error) { + logger := internal.LoggerFromCtx(ctx) + pgMetadata, err := metadataStore.NewPostgresMetadata(ctx) + if err != nil { + return nil, err + } + + client, err := mongo.Connect(options.Client(). + SetAppName("PeerDB Mongo Connector"). + SetReadPreference(readpref.Primary()). + SetCompressors([]string{"zstd", "snappy"}). + ApplyURI(config.Uri)) + if err != nil { + return nil, err + } + return &MongoConnector{ + PostgresMetadata: pgMetadata, + config: config, + client: client, + logger: logger, + }, nil +} + +func (c *MongoConnector) Close() error { + if c != nil && c.client != nil { + // Use a timeout to ensure the disconnect operation does not hang indefinitely + timeout, cancel := context.WithTimeout(context.Background(), 30*time.Second) + defer cancel() + return c.client.Disconnect(timeout) + } + return nil +} + +func (c *MongoConnector) ConnectionActive(ctx context.Context) error { + ctx, cancel := context.WithTimeout(ctx, 5*time.Second) + defer cancel() + + if err := c.client.Ping(ctx, nil); err != nil { + return fmt.Errorf("failed to ping MongoDB: %w", err) + } + return nil +} + +func (c *MongoConnector) GetVersion(ctx context.Context) (string, error) { + db := c.client.Database("admin") + + var buildInfoDoc bson.M + if err := db.RunCommand(ctx, bson.D{bson.E{Key: "buildInfo", Value: 1}}).Decode(&buildInfoDoc); err != nil { + return "", fmt.Errorf("failed to run buildInfo command: %w", err) + } + + version, ok := buildInfoDoc["version"].(string) + if !ok { + return "", fmt.Errorf("buildInfo.version is not a string, but %T", buildInfoDoc["version"]) + } + return version, nil +} + +func (c *MongoConnector) ValidateMirrorSource(ctx context.Context, cfg *protos.FlowConnectionConfigs) error { + if cfg.DoInitialSnapshot && cfg.InitialSnapshotOnly { + return nil + } + + // Check if MongoDB is configured as a replica set + var result bson.M + if err := c.client.Database("admin").RunCommand(ctx, bson.D{ + {Key: "replSetGetStatus", Value: 1}, + }).Decode(&result); err != nil { + return fmt.Errorf("failed to get replica set status: %w", err) + } + + // Check if this is a replica set + if _, ok := result["set"]; !ok { + return errors.New("MongoDB is not configured as a replica set, which is required for CDC") + } + + if myState, ok := result["myState"]; !ok { + return errors.New("myState not found in response") + } else if myStateInt, ok := myState.(int32); !ok { + return fmt.Errorf("failed to convert myState %v to int32", myState) + } else if myStateInt != 1 { + return fmt.Errorf("MongoDB is not the primary node in the replica set, current state: %d", myState) + } + + return nil +} + +func (c *MongoConnector) Client() *mongo.Client { + return c.client +} + +func (c *MongoConnector) GetTableSchema( + ctx context.Context, + _ map[string]string, + _ uint32, + _ protos.TypeSystem, + tableMappings []*protos.TableMapping, +) (map[string]*protos.TableSchema, error) { + result := make(map[string]*protos.TableSchema, len(tableMappings)) + idFieldDescription := &protos.FieldDescription{ + Name: DefaultDocumentKeyColumnName, + Type: string(types.QValueKindString), + TypeModifier: -1, + Nullable: false, + } + dataFieldDescription := &protos.FieldDescription{ + Name: DefaultFullDocumentColumnName, + Type: string(types.QValueKindJSON), + TypeModifier: -1, + Nullable: false, + } + + for _, tm := range tableMappings { + result[tm.SourceTableIdentifier] = &protos.TableSchema{ + TableIdentifier: tm.SourceTableIdentifier, + PrimaryKeyColumns: []string{DefaultDocumentKeyColumnName}, + IsReplicaIdentityFull: true, + System: protos.TypeSystem_Q, + NullableEnabled: false, + Columns: []*protos.FieldDescription{ + idFieldDescription, + dataFieldDescription, + }, + } + } + + return result, nil +} + +func (c *MongoConnector) SetupReplication(ctx context.Context, input *protos.SetupReplicationInput) (model.SetupReplicationResult, error) { + changeStreamOpts := options.ChangeStream(). + SetComment("PeerDB changeStream"). + SetFullDocument(options.UpdateLookup). + SetFullDocumentBeforeChange(options.WhenAvailable) + changeStream, err := c.client.Watch(ctx, mongo.Pipeline{}, changeStreamOpts) + if err != nil { + return model.SetupReplicationResult{}, fmt.Errorf("failed to start change stream for storing initial resume token: %w", err) + } + defer changeStream.Close(ctx) + + c.logger.Info("SetupReplication started, waiting for initial resume token", + slog.String("flowJobName", input.FlowJobName)) + var resumeToken bson.Raw + for { + resumeToken = changeStream.ResumeToken() + if resumeToken != nil { + break + } else { + c.logger.Info("Resume token not available, waiting for next change event...") + if !changeStream.Next(ctx) { + return model.SetupReplicationResult{}, fmt.Errorf("change stream error: %w", changeStream.Err()) + } + } + } + err = c.SetLastOffset(ctx, input.FlowJobName, model.CdcCheckpoint{ + Text: base64.StdEncoding.EncodeToString(resumeToken), + }) + if err != nil { + return model.SetupReplicationResult{}, fmt.Errorf("failed to store initial resume token: %w", err) + } + c.logger.Info("SetupReplication completed, stored initial resume token", + slog.String("flowJobName", input.FlowJobName)) + return model.SetupReplicationResult{}, nil +} + +// stubs for CDCPullConnectorCore + +func (c *MongoConnector) EnsurePullability(ctx context.Context, req *protos.EnsurePullabilityBatchInput) ( + *protos.EnsurePullabilityBatchOutput, error, +) { + return nil, nil +} + +func (c *MongoConnector) ExportTxSnapshot(context.Context, map[string]string) (*protos.ExportTxSnapshotOutput, any, error) { + return nil, nil, nil +} + +func (c *MongoConnector) FinishExport(any) error { + return nil +} + +func (c *MongoConnector) SetupReplConn(context.Context) error { + return nil +} + +func (c *MongoConnector) ReplPing(context.Context) error { + return nil +} + +func (c *MongoConnector) UpdateReplStateLastOffset(ctx context.Context, lastOffset model.CdcCheckpoint) error { + return nil +} + +func (c *MongoConnector) PullFlowCleanup(ctx context.Context, jobName string) error { + return nil +} + +func (c *MongoConnector) HandleSlotInfo( + ctx context.Context, + alerter *alerting.Alerter, + catalogPool shared.CatalogPool, + alertKeys *alerting.AlertKeys, + slotMetricGauges otel_metrics.SlotMetricGauges, +) error { + return nil +} + +func (c *MongoConnector) GetSlotInfo(ctx context.Context, slotName string) ([]*protos.SlotInfo, error) { + return nil, nil +} + +func (c *MongoConnector) AddTablesToPublication(ctx context.Context, req *protos.AddTablesToPublicationInput) error { + return nil +} + +func (c *MongoConnector) RemoveTablesFromPublication(ctx context.Context, req *protos.RemoveTablesFromPublicationInput) error { + return nil +} + +// end stubs + +func (c *MongoConnector) PullRecords( + ctx context.Context, + catalogPool shared.CatalogPool, + otelManager *otel_metrics.OtelManager, + req *model.PullRecordsRequest[model.RecordItems], +) error { + defer req.RecordStream.Close() + c.logger.Info("[started] PullRecords for mirror "+req.FlowJobName, + slog.Any("table_mapping", req.TableNameMapping), + slog.Uint64("max_batch_size", uint64(req.MaxBatchSize)), + slog.Duration("idle_timeout", req.IdleTimeout)) + + changeStreamOpts := options.ChangeStream(). + SetComment("PeerDB changeStream for mirror " + req.FlowJobName). + SetFullDocument(options.UpdateLookup). + SetFullDocumentBeforeChange(options.WhenAvailable) + if req.LastOffset.Text != "" { + // If we have a last offset, we resume from that point + c.logger.Info("[mongo] resuming change stream", slog.String("resumeToken", req.LastOffset.Text)) + resumeTokenBytes, err := base64.StdEncoding.DecodeString(req.LastOffset.Text) + if err != nil { + return fmt.Errorf("failed to parse last offset: %w", err) + } + changeStreamOpts.SetResumeAfter(bson.Raw(resumeTokenBytes)) + } + + changeStream, err := c.client.Watch(ctx, mongo.Pipeline{}, changeStreamOpts) + if err != nil { + var cmdErr mongo.CommandError + // ChangeStreamHistoryLost is basically slot invalidation + if errors.As(err, &cmdErr) && cmdErr.Code == 286 { + return errors.New("change stream history lost") + } + return err + } + defer changeStream.Close(ctx) + c.logger.Info("ChangeStream started for mirror " + req.FlowJobName) + + var recordCount uint32 + defer func() { + if recordCount == 0 { + req.RecordStream.SignalAsEmpty() + } + c.logger.Info(fmt.Sprintf("[finished] PullRecords streamed %d records", recordCount)) + }() + // before first record, we wait indefinitely so give it ctx + // after first record, we wait for idle timeout + getCtx := ctx + var cancelTimeout context.CancelFunc + defer func() { + if cancelTimeout != nil { + cancelTimeout() + } + }() + addRecord := func(ctx context.Context, record model.Record[model.RecordItems]) error { + recordCount += 1 + if err := req.RecordStream.AddRecord(ctx, record); err != nil { + return err + } + if recordCount == 1 { + req.RecordStream.SignalAsNotEmpty() + // after the first record, we switch to a timeout context + getCtx, cancelTimeout = context.WithTimeout(ctx, req.IdleTimeout) + } + return nil + } + + for recordCount < req.MaxBatchSize && changeStream.Next(getCtx) { + var changeDoc bson.M + if err := changeStream.Decode(&changeDoc); err != nil { + return fmt.Errorf("failed to decode change stream document: %w", err) + } + + if operationType, ok := changeDoc["operationType"]; !ok { + c.logger.Warn("operationType field not found") + continue + } else if operationType != "insert" && operationType != "update" && operationType != "replace" && operationType != "delete" { + continue + } + + clusterTime := changeDoc["clusterTime"].(bson.Timestamp) + clusterTimeNanos := time.Unix(int64(clusterTime.T), 0).UnixNano() + + sourceTableName := fmt.Sprintf("%s.%s", changeDoc["ns"].(bson.D)[0].Value, changeDoc["ns"].(bson.D)[1].Value) + destinationTableName := req.TableNameMapping[sourceTableName].Name + + items := model.NewRecordItems(2) + + if documentKey, found := changeDoc["documentKey"]; found { + if len(documentKey.(bson.D)) == 0 || documentKey.(bson.D)[0].Key != DefaultDocumentKeyColumnName { + // should never happen + return errors.New("invalid document key, expect _id") + } + id := documentKey.(bson.D)[0].Value + qValue, err := qValueStringFromKey(id) + if err != nil { + return fmt.Errorf("failed to convert _id to string: %w", err) + } + items.AddColumn(DefaultDocumentKeyColumnName, qValue) + } else { + // should never happen + return errors.New("documentKey field not found") + } + + if fullDocument, found := changeDoc["fullDocument"]; found { + qValue, err := qValueJSONFromDocument(fullDocument.(bson.D)) + if err != nil { + return fmt.Errorf("failed to convert fullDocument to JSON: %w", err) + } + items.AddColumn(DefaultFullDocumentColumnName, qValue) + } else { + // `fullDocument` field will not exist in the following scenarios: + // 1) operationType is 'delete' + // 2) document is deleted / collection is dropped in between update and lookup + // 3) update changes the values for at least one of the fields in that collection's + // shard key (although sharding is not supported today) + items.AddColumn(DefaultFullDocumentColumnName, types.QValueJSON{Val: "{}"}) + } + + if operationType, ok := changeDoc["operationType"]; ok { + switch operationType { + case "insert": + if err := addRecord(ctx, &model.InsertRecord[model.RecordItems]{ + BaseRecord: model.BaseRecord{CommitTimeNano: clusterTimeNanos}, + Items: items, + SourceTableName: sourceTableName, + DestinationTableName: destinationTableName, + }); err != nil { + return fmt.Errorf("failed to add insert record: %w", err) + } + case "update", "replace": + if err := addRecord(ctx, &model.UpdateRecord[model.RecordItems]{ + BaseRecord: model.BaseRecord{CommitTimeNano: clusterTimeNanos}, + NewItems: items, + SourceTableName: sourceTableName, + DestinationTableName: destinationTableName, + }); err != nil { + return fmt.Errorf("failed to add update record: %w", err) + } + case "delete": + if err := addRecord(ctx, &model.DeleteRecord[model.RecordItems]{ + BaseRecord: model.BaseRecord{CommitTimeNano: clusterTimeNanos}, + Items: items, + SourceTableName: sourceTableName, + DestinationTableName: destinationTableName, + }); err != nil { + return fmt.Errorf("failed to add delete record: %w", err) + } + default: + return fmt.Errorf("unsupported operationType: %s", operationType) + } + } + } + if err := changeStream.Err(); err != nil && !errors.Is(err, context.DeadlineExceeded) { + c.logger.Error("PullRecords change stream error", "error", err) + return fmt.Errorf("change stream error: %w", err) + } + if resumeToken := changeStream.ResumeToken(); resumeToken != nil { + // Update the last offset with the resume token + req.RecordStream.UpdateLatestCheckpointText(base64.StdEncoding.EncodeToString(resumeToken)) + c.logger.Info("[mongo] latest resume token", slog.String("resumeToken", req.LastOffset.Text)) + } else { + c.logger.Warn("Change stream document does not contain a resume token") + } + + return nil +} diff --git a/flow/connectors/mongo/qrep.go b/flow/connectors/mongo/qrep.go new file mode 100644 index 0000000000..4a5a23181c --- /dev/null +++ b/flow/connectors/mongo/qrep.go @@ -0,0 +1,173 @@ +package connmongo + +import ( + "context" + "errors" + "fmt" + "log/slog" + "math" + + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo/options" + + "github.com/PeerDB-io/peerdb/flow/connectors/utils" + "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/model" + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +const MongoFullTablePartitionId = "mongo-full-table-partition-id" + +func (c *MongoConnector) GetQRepPartitions( + ctx context.Context, + config *protos.QRepConfig, + last *protos.QRepPartition, +) ([]*protos.QRepPartition, error) { + // if no watermark column is specified, return a single partition + if config.WatermarkColumn == "" { + return []*protos.QRepPartition{ + { + PartitionId: MongoFullTablePartitionId, + Range: nil, + FullTablePartition: true, + }, + }, nil + } + + partitionHelper := utils.NewPartitionHelper(c.logger) + return partitionHelper.GetPartitions(), nil +} + +func (c *MongoConnector) PullQRepRecords( + ctx context.Context, + config *protos.QRepConfig, + partition *protos.QRepPartition, + stream *model.QRecordStream, +) (int64, int64, error) { + var totalRecords int64 + var totalBytes int64 + + parseWatermarkTable, err := utils.ParseSchemaTable(config.WatermarkTable) + if err != nil { + return 0, 0, fmt.Errorf("unable to parse watermark table: %w", err) + } + collection := c.client.Database(parseWatermarkTable.Schema).Collection(parseWatermarkTable.Table) + + stream.SetSchema(GetDefaultSchema()) + + filter := bson.D{} + if !partition.FullTablePartition { + // For now partition range is always nil, see `GetQRepPartitions` + if partition.Range != nil { + filter, err = toRangeFilter(partition.Range) + if err != nil { + return 0, 0, fmt.Errorf("failed to convert partition range to filter: %w", err) + } + } + } + + batchSize := config.NumRowsPerPartition + if config.NumRowsPerPartition <= 0 || config.NumRowsPerPartition > math.MaxInt32 { + batchSize = math.MaxInt32 + } + + // MongoDb will use the lesser of batchSize and 16MiB + // https://www.mongodb.com/docs/manual/reference/method/cursor.batchsize/ + cursor, err := collection.Find(ctx, filter, options.Find().SetBatchSize(int32(batchSize))) + if err != nil { + return 0, 0, fmt.Errorf("failed to query for records: %w", err) + } + defer cursor.Close(ctx) + + for cursor.Next(ctx) { + var doc bson.D + if err := cursor.Decode(&doc); err != nil { + return 0, 0, fmt.Errorf("failed to decode record: %w", err) + } + + record, bytes, err := QValuesFromDocument(doc) + if err != nil { + return 0, 0, fmt.Errorf("failed to convert record: %w", err) + } + stream.Records <- record + totalRecords += 1 + totalBytes += bytes + } + close(stream.Records) + if err := cursor.Err(); err != nil { + if errors.Is(err, context.Canceled) { + c.logger.Warn("context canceled while reading documents", + slog.Any("partition", partition.PartitionId), + slog.String("watermark_table", config.WatermarkTable)) + } else { + c.logger.Error("error while reading documents", + slog.Any("partition", partition.PartitionId), + slog.String("watermark_table", config.WatermarkTable), + slog.String("error", err.Error())) + } + return 0, 0, fmt.Errorf("cursor error: %w", err) + } + + return totalRecords, totalBytes, nil +} + +func GetDefaultSchema() types.QRecordSchema { + schema := make([]types.QField, 0, 2) + schema = append(schema, + types.QField{ + Name: DefaultDocumentKeyColumnName, + Type: types.QValueKindString, + Nullable: false, + }, + types.QField{ + Name: DefaultFullDocumentColumnName, + Type: types.QValueKindJSON, + Nullable: false, + }) + return types.QRecordSchema{Fields: schema} +} + +func toRangeFilter(partitionRange *protos.PartitionRange) (bson.D, error) { + switch r := partitionRange.Range.(type) { + case *protos.PartitionRange_ObjectIdRange: + return bson.D{ + bson.E{Key: DefaultDocumentKeyColumnName, Value: bson.D{ + bson.E{Key: "$gte", Value: r.ObjectIdRange.Start}, + bson.E{Key: "$lte", Value: r.ObjectIdRange.End}, + }}, + }, nil + default: + return nil, errors.New("unsupported partition range type") + } +} + +func QValuesFromDocument(doc bson.D) ([]types.QValue, int64, error) { + var qValues []types.QValue + var size int64 + + var qvalueId types.QValueString + var err error + for _, v := range doc { + if v.Key == DefaultDocumentKeyColumnName { + qvalueId, err = qValueStringFromKey(v.Value) + if err != nil { + return nil, 0, fmt.Errorf("failed to convert key %s: %w", DefaultDocumentKeyColumnName, err) + } + break + } + } + if qvalueId.Val == "" { + return nil, 0, fmt.Errorf("key %s not found", DefaultDocumentKeyColumnName) + } + qValues = append(qValues, qvalueId) + + qvalueDoc, err := qValueJSONFromDocument(doc) + if err != nil { + return nil, 0, err + } + qValues = append(qValues, qvalueDoc) + + size += int64(len(qvalueDoc.Val)) + + return qValues, size, nil +} diff --git a/flow/connectors/mongo/qvalue_convert.go b/flow/connectors/mongo/qvalue_convert.go new file mode 100644 index 0000000000..df20d7e0c3 --- /dev/null +++ b/flow/connectors/mongo/qvalue_convert.go @@ -0,0 +1,24 @@ +package connmongo + +import ( + "encoding/json" + "fmt" + + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +func qValueStringFromKey(key any) (types.QValueString, error) { + jsonb, err := json.Marshal(key) + if err != nil { + return types.QValueString{}, fmt.Errorf("error marshalling key: %w", err) + } + return types.QValueString{Val: string(jsonb)}, nil +} + +func qValueJSONFromDocument(document interface{}) (types.QValueJSON, error) { + jsonb, err := json.Marshal(document) + if err != nil { + return types.QValueJSON{}, fmt.Errorf("error marshalling document: %w", err) + } + return types.QValueJSON{Val: string(jsonb), IsArray: false}, nil +} diff --git a/flow/connectors/mongo/schema.go b/flow/connectors/mongo/schema.go new file mode 100644 index 0000000000..c6e6bd2aab --- /dev/null +++ b/flow/connectors/mongo/schema.go @@ -0,0 +1,88 @@ +package connmongo + +import ( + "context" + "fmt" + + "go.mongodb.org/mongo-driver/v2/bson" + + "github.com/PeerDB-io/peerdb/flow/generated/protos" +) + +func (c *MongoConnector) GetAllTables(ctx context.Context) (*protos.AllTablesResponse, error) { + tableNames := make([]string, 0) + + dbNames, err := c.getAllDatabaseNames(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get databases: %w", err) + } + for _, dbName := range dbNames { + collNames, err := c.client.Database(dbName).ListCollectionNames(ctx, bson.D{}) + if err != nil { + return nil, fmt.Errorf("failed to get collections: %w", err) + } + for _, collName := range collNames { + tableNames = append(tableNames, fmt.Sprintf("%s.%s", dbName, collName)) + } + } + return &protos.AllTablesResponse{ + Tables: tableNames, + }, nil +} + +func (c *MongoConnector) GetSchemas(ctx context.Context) (*protos.PeerSchemasResponse, error) { + dbNames, err := c.getAllDatabaseNames(ctx) + if err != nil { + return nil, fmt.Errorf("failed to get databases: %w", err) + } + return &protos.PeerSchemasResponse{ + Schemas: dbNames, + }, nil +} + +func (c *MongoConnector) GetTablesInSchema(ctx context.Context, schema string, cdcEnabled bool) (*protos.SchemaTablesResponse, error) { + db := c.client.Database(schema) + collectionNames, err := db.ListCollectionNames(ctx, bson.D{}) + if err != nil { + return nil, fmt.Errorf("failed to get collections: %w", err) + } + + response := protos.SchemaTablesResponse{ + Tables: make([]*protos.TableResponse, 0, len(collectionNames)), + } + + for _, collectionName := range collectionNames { + tableResp := &protos.TableResponse{ + TableName: collectionName, + CanMirror: true, + // TODO: implement TableSize fetching + TableSize: "", + } + response.Tables = append(response.Tables, tableResp) + } + + return &response, nil +} + +func (c *MongoConnector) GetColumns(ctx context.Context, version uint32, schema string, table string) (*protos.TableColumnsResponse, error) { + return &protos.TableColumnsResponse{ + Columns: []*protos.ColumnsItem{}, + }, nil +} + +// Get all database names, but excluding MongoDB's default databases +func (c *MongoConnector) getAllDatabaseNames(ctx context.Context) ([]string, error) { + filter := bson.M{ + "name": bson.M{ + "$nin": []string{"admin", "local", "config"}, + }, + } + dbs, err := c.client.ListDatabaseNames(ctx, filter) + if err != nil { + return nil, err + } + filteredDbNames := make([]string, 0, len(dbs)) + filteredDbNames = append(filteredDbNames, dbs...) + + return filteredDbNames, nil +} diff --git a/flow/connectors/mysql/cdc.go b/flow/connectors/mysql/cdc.go index 6658c19c8c..60181cd481 100644 --- a/flow/connectors/mysql/cdc.go +++ b/flow/connectors/mysql/cdc.go @@ -22,18 +22,20 @@ import ( "github.com/PeerDB-io/peerdb/flow/alerting" "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/connectors/utils/monitoring" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/otel_metrics" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + qmysql "github.com/PeerDB-io/peerdb/flow/shared/mysql" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func (c *MySqlConnector) GetTableSchema( ctx context.Context, env map[string]string, + version uint32, system protos.TypeSystem, tableMappings []*protos.TableMapping, ) (map[string]*protos.TableSchema, error) { @@ -105,7 +107,7 @@ func (c *MySqlConnector) getTableSchemaForTable( if err != nil { return nil, err } - qkind, err := qkindFromMysqlColumnType(dataType) + qkind, err := qmysql.QkindFromMysqlColumnType(dataType) if err != nil { return nil, err } @@ -173,7 +175,7 @@ func (c *MySqlConnector) SetupReplication( if err != nil { return model.SetupReplicationResult{}, fmt.Errorf("[mysql] SetupReplication failed to GetMasterPos: %w", err) } - lastOffsetText = fmt.Sprintf("!f:%s,%x", pos.Name, pos.Pos) + lastOffsetText = posToOffsetText(pos) } if err := c.SetLastOffset( ctx, req.FlowJobName, model.CdcCheckpoint{Text: lastOffsetText}, @@ -334,6 +336,11 @@ func (c *MySqlConnector) PullRecords( ) error { defer req.RecordStream.Close() + sourceSchemaAsDestinationColumn, err := internal.PeerDBSourceSchemaAsDestinationColumn(ctx, req.Env) + if err != nil { + return err + } + syncer, mystream, gset, pos, err := c.startStreaming(ctx, req.LastOffset.Text) if err != nil { return err @@ -341,6 +348,7 @@ func (c *MySqlConnector) PullRecords( defer syncer.Close() var skewLossReported bool + var updatedOffset string var inTx bool var recordCount uint32 // set when a tx is preventing us from respecting the timeout, immediately exit after we see inTx false @@ -349,15 +357,13 @@ func (c *MySqlConnector) PullRecords( if recordCount == 0 { req.RecordStream.SignalAsEmpty() } - c.logger.Info(fmt.Sprintf("[finished] PullRecords streamed %d records", recordCount)) + c.logger.Info("[mysql] PullRecords finished streaming", slog.Uint64("records", uint64(recordCount))) }() - timeoutCtx := ctx - var cancelTimeout context.CancelFunc + timeoutCtx, cancelTimeout := context.WithTimeout(ctx, time.Hour) + //nolint:gocritic // cancelTimeout is rebound, do not defer cancelTimeout() defer func() { - if cancelTimeout != nil { - cancelTimeout() - } + cancelTimeout() }() addRecord := func(ctx context.Context, record model.Record[model.RecordItems]) error { @@ -367,67 +373,59 @@ func (c *MySqlConnector) PullRecords( } if recordCount == 1 { req.RecordStream.SignalAsNotEmpty() - if cancelTimeout != nil { - cancelTimeout() - } + cancelTimeout() timeoutCtx, cancelTimeout = context.WithTimeout(ctx, req.IdleTimeout) } return nil } var mysqlParser *parser.Parser - for inTx || recordCount < req.MaxBatchSize { - getCtx := ctx - if overtime && !inTx { - return nil + for inTx || (!overtime && recordCount < req.MaxBatchSize) { + var event *replication.BinlogEvent + // don't gamble on closed timeoutCtx.Done() being prioritized over event backlog channel + err := timeoutCtx.Err() + if err == nil { + event, err = mystream.GetEvent(timeoutCtx) } + if err != nil { + if ctxErr := ctx.Err(); ctxErr != nil { + c.logger.Info("[mysql] PullRecords context canceled, stopping streaming", slog.Any("error", err)) + //nolint:govet // cancelTimeout called by defer, spurious lint + return ctxErr + } else if errors.Is(err, context.DeadlineExceeded) { + if recordCount == 0 { + // progress offset while no records read to avoid falling behind when all tables inactive + if updatedOffset != "" { + c.logger.Info("[mysql] updating inactive offset", slog.Any("offset", updatedOffset)) + if err := c.SetLastOffset(ctx, req.FlowJobName, model.CdcCheckpoint{Text: updatedOffset}); err != nil { + c.logger.Error("[mysql] failed to update offset, ignoring", slog.Any("error", err)) + } else { + updatedOffset = "" + } + } - // don't gamble on closed timeoutCtx.Done() being prioritized over event backlog channel - if err := timeoutCtx.Err(); err != nil { - if errors.Is(err, context.DeadlineExceeded) { - if inTx { + // reset timer for next offset update + cancelTimeout() + timeoutCtx, cancelTimeout = context.WithTimeout(ctx, time.Hour) + } else if inTx { c.logger.Info("[mysql] timeout reached, but still in transaction, waiting for inTx false", slog.Uint64("recordCount", uint64(recordCount))) // reset timeoutCtx to a low value and wait for inTx to become false - if cancelTimeout != nil { - cancelTimeout() - } + cancelTimeout() //nolint:govet // cancelTimeout called by defer, spurious lint - timeoutCtx, cancelTimeout = context.WithTimeout(ctx, 10*time.Second) + timeoutCtx, cancelTimeout = context.WithTimeout(ctx, time.Minute) overtime = true } else { return nil } - } else { - return err - } - } - if recordCount > 0 && !inTx { - // if we have records and are safe, start respecting the timeout - getCtx = timeoutCtx - } - event, err := mystream.GetEvent(getCtx) - if err != nil { - if errors.Is(err, context.DeadlineExceeded) { - if !inTx { - //nolint:govet - return nil - } - // if in tx, don't let syncer exit due to deadline exceeded continue } else { - if errors.Is(err, context.Canceled) { - c.logger.Info("[mysql] PullRecords context canceled, stopping streaming", slog.Any("error", err)) - } else { - c.logger.Error("[mysql] PullRecords failed to get event", slog.Any("error", err)) - } - return err + c.logger.Error("[mysql] PullRecords failed to get event", slog.Any("error", err)) } + return err } - otelManager.Metrics.FetchedBytesCounter.Add(ctx, int64(len(event.RawData))) - switch ev := event.Event.(type) { case *replication.GTIDEvent: if ev.ImmediateCommitTimestamp > 0 { @@ -437,28 +435,27 @@ func (c *MySqlConnector) PullRecords( case *replication.XIDEvent: if gset != nil { gset = ev.GSet - req.RecordStream.UpdateLatestCheckpointText(gset.String()) + updatedOffset = gset.String() + req.RecordStream.UpdateLatestCheckpointText(updatedOffset) } else if event.Header.LogPos > pos.Pos { pos.Pos = event.Header.LogPos - req.RecordStream.UpdateLatestCheckpointText(fmt.Sprintf("!f:%s,%x", pos.Name, pos.Pos)) + updatedOffset = posToOffsetText(pos) + req.RecordStream.UpdateLatestCheckpointText(updatedOffset) } inTx = false case *replication.RotateEvent: if gset == nil && (event.Header.Timestamp != 0 || string(ev.NextLogName) != pos.Name) { pos.Name = string(ev.NextLogName) pos.Pos = uint32(ev.Position) - req.RecordStream.UpdateLatestCheckpointText(fmt.Sprintf("!f:%s,%x", pos.Name, pos.Pos)) + updatedOffset = posToOffsetText(pos) + req.RecordStream.UpdateLatestCheckpointText(updatedOffset) c.logger.Info("rotate", slog.String("name", pos.Name), slog.Uint64("pos", uint64(pos.Pos))) } case *replication.QueryEvent: - if !inTx { - if gset != nil { - gset = ev.GSet - req.RecordStream.UpdateLatestCheckpointText(gset.String()) - } else if event.Header.LogPos > pos.Pos { - pos.Pos = event.Header.LogPos - req.RecordStream.UpdateLatestCheckpointText(fmt.Sprintf("!f:%s,%x", pos.Name, pos.Pos)) - } + if !inTx && gset == nil && event.Header.LogPos > pos.Pos { + pos.Pos = event.Header.LogPos + updatedOffset = posToOffsetText(pos) + req.RecordStream.UpdateLatestCheckpointText(updatedOffset) } if mysqlParser == nil { mysqlParser = parser.New() @@ -484,6 +481,7 @@ func (c *MySqlConnector) PullRecords( exclusion := req.TableNameMapping[sourceTableName].Exclude schema := req.TableNameSchemaMapping[destinationTableName] if schema != nil { + otelManager.Metrics.FetchedBytesCounter.Add(ctx, int64(len(event.RawData))) inTx = true enumMap := ev.Table.EnumStrValueMap() setMap := ev.Table.SetStrValueMap() @@ -525,12 +523,15 @@ func (c *MySqlConnector) PullRecords( continue } val, err := QValueFromMysqlRowEvent(ev.Table.ColumnType[idx], enumMap[idx], setMap[idx], - qvalue.QValueKind(fd.Type), val) + types.QValueKind(fd.Type), val) if err != nil { return err } items.AddColumn(fd.Name, val) } + if sourceSchemaAsDestinationColumn { + items.AddColumn("_peerdb_source_schema", types.QValueString{Val: string(ev.Table.Schema)}) + } if err := addRecord(ctx, &model.InsertRecord[model.RecordItems]{ BaseRecord: model.BaseRecord{CommitTimeNano: int64(event.Header.Timestamp) * 1e9}, @@ -559,7 +560,7 @@ func (c *MySqlConnector) PullRecords( continue } val, err := QValueFromMysqlRowEvent(ev.Table.ColumnType[idx], enumMap[idx], setMap[idx], - qvalue.QValueKind(fd.Type), val) + types.QValueKind(fd.Type), val) if err != nil { return err } @@ -573,12 +574,15 @@ func (c *MySqlConnector) PullRecords( continue } val, err := QValueFromMysqlRowEvent(ev.Table.ColumnType[idx], enumMap[idx], setMap[idx], - qvalue.QValueKind(fd.Type), val) + types.QValueKind(fd.Type), val) if err != nil { return err } newItems.AddColumn(fd.Name, val) } + if sourceSchemaAsDestinationColumn { + newItems.AddColumn("_peerdb_source_schema", types.QValueString{Val: string(ev.Table.Schema)}) + } if err := addRecord(ctx, &model.UpdateRecord[model.RecordItems]{ BaseRecord: model.BaseRecord{CommitTimeNano: int64(event.Header.Timestamp) * 1e9}, @@ -608,12 +612,15 @@ func (c *MySqlConnector) PullRecords( continue } val, err := QValueFromMysqlRowEvent(ev.Table.ColumnType[idx], enumMap[idx], setMap[idx], - qvalue.QValueKind(fd.Type), val) + types.QValueKind(fd.Type), val) if err != nil { return err } items.AddColumn(fd.Name, val) } + if sourceSchemaAsDestinationColumn { + items.AddColumn("_peerdb_source_schema", types.QValueString{Val: string(ev.Table.Schema)}) + } if err := addRecord(ctx, &model.DeleteRecord[model.RecordItems]{ BaseRecord: model.BaseRecord{CommitTimeNano: int64(event.Header.Timestamp) * 1e9}, @@ -672,7 +679,7 @@ func (c *MySqlConnector) processAlterTableQuery(ctx context.Context, catalogPool slog.String("tableName", sourceTableName)) continue } - qkind, err := qkindFromMysqlColumnType(col.Tp.InfoSchemaStr()) + qkind, err := qmysql.QkindFromMysqlColumnType(col.Tp.InfoSchemaStr()) if err != nil { return err } @@ -719,3 +726,7 @@ func (c *MySqlConnector) processAlterTableQuery(ctx context.Context, catalogPool } return nil } + +func posToOffsetText(pos mysql.Position) string { + return fmt.Sprintf("!f:%s,%x", pos.Name, pos.Pos) +} diff --git a/flow/connectors/mysql/metered_dialer.go b/flow/connectors/mysql/metered_dialer.go new file mode 100644 index 0000000000..58c195712e --- /dev/null +++ b/flow/connectors/mysql/metered_dialer.go @@ -0,0 +1,37 @@ +package connmysql + +/* go-mysql does not expose raw bytes for streaming selects, + * thus this allows accurately measuring fetched bytes. + * go-mysql wraps connection with tls.Conn for tls, + * so one should not try retrieving the original connection. + * simpler to have the meter write to an atomic int we own. + */ + +import ( + "context" + "net" + "sync/atomic" + + "github.com/go-mysql-org/go-mysql/client" +) + +type MeteredConn struct { + net.Conn + bytesRead *atomic.Int64 +} + +func (mc *MeteredConn) Read(b []byte) (int, error) { + read, err := mc.Conn.Read(b) + mc.bytesRead.Add(int64(read)) + return read, err +} + +func NewMeteredDialer(bytesRead *atomic.Int64, innerDialer client.Dialer) client.Dialer { + return func(ctx context.Context, network string, address string) (net.Conn, error) { + conn, err := innerDialer(ctx, network, address) + if err != nil { + return conn, err + } + return &MeteredConn{Conn: conn, bytesRead: bytesRead}, nil + } +} diff --git a/flow/connectors/mysql/mysql.go b/flow/connectors/mysql/mysql.go index 242d4e4cfc..87c45e9917 100644 --- a/flow/connectors/mysql/mysql.go +++ b/flow/connectors/mysql/mysql.go @@ -32,6 +32,7 @@ type MySqlConnector struct { logger log.Logger rdsAuth *utils.RDSAuth serverVersion string + bytesRead atomic.Int64 } func NewMySqlConnector(ctx context.Context, config *protos.MySqlConfig) (*MySqlConnector, error) { @@ -61,7 +62,7 @@ func NewMySqlConnector(ctx context.Context, config *protos.MySqlConfig) (*MySqlC ssh: ssh, conn: atomic.Pointer[client.Conn]{}, contexts: contexts, - logger: internal.LoggerFromCtx(ctx), + logger: logger, rdsAuth: rdsAuth, } go func() { @@ -125,11 +126,9 @@ func (c *MySqlConnector) ConnectionActive(context.Context) error { func (c *MySqlConnector) Dialer() client.Dialer { if c.ssh.Client == nil { - return (&net.Dialer{Timeout: time.Minute}).DialContext - } - return func(ctx context.Context, network, addr string) (net.Conn, error) { - return c.ssh.Client.DialContext(ctx, network, addr) + return NewMeteredDialer(&c.bytesRead, (&net.Dialer{Timeout: time.Minute}).DialContext) } + return NewMeteredDialer(&c.bytesRead, c.ssh.Client.DialContext) } func (c *MySqlConnector) connect(ctx context.Context) (*client.Conn, error) { @@ -169,7 +168,7 @@ func (c *MySqlConnector) connect(ctx context.Context) (*client.Conn, error) { config.Password = token } var err error - conn, err = client.ConnectWithDialer(ctx, "", fmt.Sprintf("%s:%d", config.Host, config.Port), + conn, err = client.ConnectWithDialer(ctx, "", shared.JoinHostPort(config.Host, config.Port), config.User, config.Password, config.Database, c.Dialer(), argF...) if err != nil { return nil, err @@ -250,12 +249,13 @@ func (c *MySqlConnector) ExecuteSelectStreaming(ctx context.Context, cmd string, rowCb client.SelectPerRowCallback, resultCb client.SelectPerResultCallback, args ...any, -) error { +) (int64, error) { var connectionErr error for conn, err := range c.withRetries(ctx) { if err != nil { - return err + return 0, err } + c.bytesRead.Store(0) if len(args) == 0 { if err := conn.ExecuteSelectStreaming(cmd, result, rowCb, resultCb); err != nil { @@ -263,7 +263,7 @@ func (c *MySqlConnector) ExecuteSelectStreaming(ctx context.Context, cmd string, connectionErr = err continue } - return err + return 0, err } } else { stmt, err := conn.Prepare(cmd) @@ -272,7 +272,7 @@ func (c *MySqlConnector) ExecuteSelectStreaming(ctx context.Context, cmd string, connectionErr = err continue } - return err + return 0, err } err = stmt.ExecuteSelectStreaming(result, rowCb, resultCb, args...) _ = stmt.Close() @@ -281,12 +281,12 @@ func (c *MySqlConnector) ExecuteSelectStreaming(ctx context.Context, cmd string, connectionErr = err continue } - return err + return 0, err } } - return nil + return c.bytesRead.Load(), nil } - return connectionErr + return 0, connectionErr } func (c *MySqlConnector) GetGtidModeOn(ctx context.Context) (bool, error) { diff --git a/flow/connectors/mysql/qrep.go b/flow/connectors/mysql/qrep.go index 2e4356ae0b..17636b8502 100644 --- a/flow/connectors/mysql/qrep.go +++ b/flow/connectors/mysql/qrep.go @@ -6,7 +6,6 @@ import ( "errors" "fmt" "log/slog" - "math" "strconv" "text/template" @@ -16,10 +15,33 @@ import ( "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" shared_mysql "github.com/PeerDB-io/peerdb/flow/shared/mysql" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) +func (c *MySqlConnector) GetDataTypeOfWatermarkColumn( + ctx context.Context, + watermarkTableName string, + watermarkColumn string, +) (types.QValueKind, byte, error) { + if watermarkColumn == "" { + return "", 0, errors.New("watermark column is not specified in the config") + } + + query := fmt.Sprintf("SELECT `%s` FROM %s LIMIT 0", watermarkColumn, watermarkTableName) + rs, err := c.Execute(ctx, query) + if err != nil { + return "", 0, fmt.Errorf("failed to execute query for watermark column type: %w", err) + } + + if len(rs.Fields) == 0 { + return "", 0, fmt.Errorf("no fields returned from select query: %s", query) + } + + qk, err := qkindFromMysql(rs.Fields[0]) + return qk, rs.Fields[0].Type, err +} + func (c *MySqlConnector) GetQRepPartitions( ctx context.Context, config *protos.QRepConfig, @@ -46,7 +68,7 @@ func (c *MySqlConnector) GetQRepPartitions( whereClause := "" if last != nil && last.Range != nil { - whereClause = fmt.Sprintf("WHERE %s > $1", quotedWatermarkColumn) + whereClause = fmt.Sprintf("WHERE %s > ?", quotedWatermarkColumn) } parsedWatermarkTable, err := utils.ParseSchemaTable(config.WatermarkTable) if err != nil { @@ -64,7 +86,7 @@ func (c *MySqlConnector) GetQRepPartitions( case *protos.PartitionRange_UintRange: minVal = lastRange.UintRange.End case *protos.PartitionRange_TimestampRange: - minVal = lastRange.TimestampRange.End.AsTime() + minVal = lastRange.TimestampRange.End.AsTime().String() } c.logger.Info(fmt.Sprintf("count query: %s - minVal: %v", countQuery, minVal)) @@ -99,62 +121,115 @@ func (c *MySqlConnector) GetQRepPartitions( if totalRows%numRowsPerPartition != 0 { numPartitions++ } + + watermarkQKind, watermarkMyType, err := c.GetDataTypeOfWatermarkColumn(ctx, parsedWatermarkTable.MySQL(), config.WatermarkColumn) + if err != nil { + return nil, fmt.Errorf("failed to get data type of watermark column %s: %w", config.WatermarkColumn, err) + } + c.logger.Info(fmt.Sprintf("total rows: %d, num partitions: %d, num rows per partition: %d", totalRows, numPartitions, numRowsPerPartition)) var rs *mysql.Result - if minVal != nil { - // Query to get partitions using window functions - partitionsQuery := fmt.Sprintf( - `SELECT bucket, MIN(%[2]s) AS start, MAX(%[2]s) AS end - FROM ( - SELECT NTILE(%[1]d) OVER (ORDER BY %[2]s) AS bucket, %[2]s - FROM %[3]s WHERE %[2]s > $1 - ) AS subquery + + switch watermarkQKind { + case types.QValueKindInt8, types.QValueKindInt16, types.QValueKindInt32, types.QValueKindInt64, + types.QValueKindUInt8, types.QValueKindUInt16, types.QValueKindUInt32, types.QValueKindUInt64: + if minVal != nil { + partitionsQuery := fmt.Sprintf( + `WITH stats AS ( + SELECT MIN(%[2]s) AS min_watermark, + 1.0 * (MAX(%[2]s) - MIN(%[2]s)) / (%[1]d) AS range_size + FROM %[3]s WHERE %[2]s > ? + ) + SELECT FLOOR((w.%[2]s - s.min_watermark) / s.range_size) AS bucket, + MIN(w.%[2]s) AS start, MAX(w.%[2]s) AS end + FROM %[3]s AS w + CROSS JOIN stats AS s + WHERE w.%[2]s > ? GROUP BY bucket - ORDER BY start`, - numPartitions, - quotedWatermarkColumn, - parsedWatermarkTable.MySQL(), - ) - c.logger.Info("partitions query", slog.String("query", partitionsQuery), slog.Any("minVal", minVal)) - rs, err = c.Execute(ctx, partitionsQuery, minVal) - } else { - partitionsQuery := fmt.Sprintf( - `SELECT bucket, MIN(%[2]s) AS start, MAX(%[2]s) AS end - FROM ( - SELECT NTILE(%[1]d) OVER (ORDER BY %[2]s) AS bucket, %[2]s FROM %[3]s - ) AS subquery + ORDER BY start;`, + numPartitions, + quotedWatermarkColumn, + parsedWatermarkTable.MySQL(), + ) + c.logger.Info("partitions query", slog.String("query", partitionsQuery), slog.Any("minVal", minVal)) + rs, err = c.Execute(ctx, partitionsQuery, minVal, minVal) + } else { + partitionsQuery := fmt.Sprintf( + `WITH stats AS ( + SELECT MIN(%[2]s) AS min_watermark, + 1.0 * (MAX(%[2]s) - MIN(%[2]s)) / (%[1]d) AS range_size + FROM %[3]s + ) + SELECT FLOOR((w.%[2]s - s.min_watermark) / s.range_size) AS bucket, + MIN(w.%[2]s) AS start, MAX(w.%[2]s) AS end + FROM %[3]s AS w + CROSS JOIN stats AS s GROUP BY bucket - ORDER BY start`, - numPartitions, - quotedWatermarkColumn, - parsedWatermarkTable.MySQL(), - ) - c.logger.Info("partitions query", slog.String("query", partitionsQuery)) - rs, err = c.Execute(ctx, partitionsQuery) - } - if err != nil { - return nil, fmt.Errorf("failed to query for partitions: %w", err) + ORDER BY start;`, + numPartitions, + quotedWatermarkColumn, + parsedWatermarkTable.MySQL(), + ) + c.logger.Info("partitions query", slog.String("query", partitionsQuery)) + rs, err = c.Execute(ctx, partitionsQuery) + } + if err != nil { + return nil, fmt.Errorf("failed to query for partitions: %w", err) + } + case types.QValueKindTimestamp, types.QValueKindTimestampTZ: + if minVal != nil { + partitionsQuery := fmt.Sprintf( + `WITH stats AS ( + SELECT MIN(%[2]s) AS min_watermark, + 1.0 * (TIMESTAMPDIFF(MICROSECOND, MAX(%[2]s), MIN(%[2]s)) / (%[1]d)) AS range_size + FROM %[3]s WHERE %[2]s > ? + ) + SELECT FLOOR(TIMESTAMPDIFF(MICROSECOND, w.%[2]s, s.min_watermark) / s.range_size) AS bucket, + MIN(w.%[2]s) AS start, MAX(w.%[2]s) AS end + FROM %[3]s AS w + CROSS JOIN stats AS s + WHERE w.%[2]s > ? + GROUP BY bucket + ORDER BY start;`, + numPartitions, + quotedWatermarkColumn, + parsedWatermarkTable.MySQL(), + ) + c.logger.Info("partitions query", slog.String("query", partitionsQuery), slog.Any("minVal", minVal)) + rs, err = c.Execute(ctx, partitionsQuery, minVal) + } else { + partitionsQuery := fmt.Sprintf( + `WITH stats AS ( + SELECT MIN(%[2]s) AS min_watermark, + 1.0 * (TIMESTAMPDIFF(MICROSECOND, MAX(%[2]s), MIN(%[2]s)) / (%[1]d)) AS range_size + FROM %[3]s + ) + SELECT FLOOR(TIMESTAMPDIFF(MICROSECOND, w.%[2]s, s.min_watermark) / s.range_size) AS bucket, + MIN(w.%[2]s) AS start, MAX(w.%[2]s) AS end + FROM %[3]s AS w + CROSS JOIN stats AS s + GROUP BY bucket + ORDER BY start;`, + numPartitions, + quotedWatermarkColumn, + parsedWatermarkTable.MySQL(), + ) + c.logger.Info("partitions query", slog.String("query", partitionsQuery)) + rs, err = c.Execute(ctx, partitionsQuery) + } + if err != nil { + return nil, fmt.Errorf("failed to query for partitions: %w", err) + } } - qk1, err := qkindFromMysql(rs.Fields[1]) - if err != nil { - return nil, err - } - qk2, err := qkindFromMysql(rs.Fields[2]) - if err != nil { - return nil, err - } - if qk1 != qk2 { - return nil, fmt.Errorf("low/high of partition range should be same type, got low:%s, high:%s", qk1, qk2) - } partitionHelper := utils.NewPartitionHelper(c.logger) for _, row := range rs.Values { - val1, err := QValueFromMysqlFieldValue(qk1, row[1]) + val1, err := QValueFromMysqlFieldValue(watermarkQKind, watermarkMyType, row[1]) if err != nil { return nil, err } - val2, err := QValueFromMysqlFieldValue(qk2, row[2]) + val2, err := QValueFromMysqlFieldValue(watermarkQKind, watermarkMyType, row[2]) if err != nil { return nil, err } @@ -169,7 +244,7 @@ func (c *MySqlConnector) GetQRepPartitions( func (c *MySqlConnector) PullQRepRecords( ctx context.Context, config *protos.QRepConfig, - last *protos.QRepPartition, + partition *protos.QRepPartition, stream *model.QRecordStream, ) (int64, int64, error) { tableSchema, err := c.getTableSchemaForTable(ctx, config.Env, @@ -191,47 +266,13 @@ func (c *MySqlConnector) PullQRepRecords( var rs mysql.Result onRow := func(row []mysql.FieldValue) error { totalRecords += 1 - totalBytes += int64(len(row) / 8) // null bitmap - for idx, val := range row { - // TODO ideally go-mysql would give us row buffer, need upstream PR - // see mysql/rowdata.go in go-mysql for field sizes - // unfortunately we're using text protocol, so this is a weak estimate - switch rs.Fields[idx].Type { - case mysql.MYSQL_TYPE_NULL: - // 0 - case mysql.MYSQL_TYPE_TINY, mysql.MYSQL_TYPE_SHORT, mysql.MYSQL_TYPE_INT24, mysql.MYSQL_TYPE_LONG, mysql.MYSQL_TYPE_LONGLONG: - var v uint64 - if val.Type == mysql.FieldValueTypeUnsigned { - v = val.AsUint64() - } else { - signed := val.AsInt64() - if signed < 0 { - v = uint64(-signed) - } else { - v = uint64(signed) - } - } - if v < 10 { - totalBytes += 1 - } else if v > 99999999999999 { - // math.log10(10**15-1) == 15.0, so pick boundary where we're accurate, cap at 15 for simplicity - totalBytes += 15 - } else { - totalBytes += 1 + int64(math.Log10(float64(val.AsUint64()))) - } - case mysql.MYSQL_TYPE_YEAR, mysql.MYSQL_TYPE_FLOAT, mysql.MYSQL_TYPE_DOUBLE: - totalBytes += 4 - default: - totalBytes += int64(len(val.AsString())) - } - } schema, err := stream.Schema() if err != nil { return err } - record := make([]qvalue.QValue, 0, len(row)) + record := make([]types.QValue, 0, len(row)) for idx, val := range row { - qv, err := QValueFromMysqlFieldValue(schema.Fields[idx].Type, val) + qv, err := QValueFromMysqlFieldValue(schema.Fields[idx].Type, rs.Fields[idx].Type, val) if err != nil { return fmt.Errorf("could not convert mysql value for %s: %w", schema.Fields[idx].Name, err) } @@ -241,17 +282,19 @@ func (c *MySqlConnector) PullQRepRecords( return nil } - if last.FullTablePartition { + if partition.FullTablePartition { // this is a full table partition, so just run the query - if err := c.ExecuteSelectStreaming(ctx, config.Query, &rs, onRow, onResult); err != nil { + readBytes, err := c.ExecuteSelectStreaming(ctx, config.Query, &rs, onRow, onResult) + if err != nil { return 0, 0, err } + totalBytes += readBytes } else { var rangeStart string var rangeEnd string // Depending on the type of the range, convert the range into the correct type - switch x := last.Range.Range.(type) { + switch x := partition.Range.Range.(type) { case *protos.PartitionRange_IntRange: rangeStart = strconv.FormatInt(x.IntRange.Start, 10) rangeEnd = strconv.FormatInt(x.IntRange.End, 10) @@ -272,9 +315,11 @@ func (c *MySqlConnector) PullQRepRecords( return 0, 0, err } - if err := c.ExecuteSelectStreaming(ctx, query, &rs, onRow, onResult); err != nil { + readBytes, err := c.ExecuteSelectStreaming(ctx, query, &rs, onRow, onResult) + if err != nil { return 0, 0, err } + totalBytes += readBytes } close(stream.Records) diff --git a/flow/connectors/mysql/qvalue_convert.go b/flow/connectors/mysql/qvalue_convert.go index 71b67e5e29..a23a32a47f 100644 --- a/flow/connectors/mysql/qvalue_convert.go +++ b/flow/connectors/mysql/qvalue_convert.go @@ -15,179 +15,105 @@ import ( "github.com/shopspring/decimal" geom "github.com/twpayne/go-geos" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) -func qkindFromMysql(field *mysql.Field) (qvalue.QValueKind, error) { +func qkindFromMysql(field *mysql.Field) (types.QValueKind, error) { unsigned := (field.Flag & mysql.UNSIGNED_FLAG) != 0 switch field.Type { case mysql.MYSQL_TYPE_TINY: if unsigned { - return qvalue.QValueKindUInt8, nil + return types.QValueKindUInt8, nil } else { - return qvalue.QValueKindInt8, nil + return types.QValueKindInt8, nil } case mysql.MYSQL_TYPE_SHORT: if unsigned { - return qvalue.QValueKindUInt16, nil + return types.QValueKindUInt16, nil } else { - return qvalue.QValueKindInt16, nil + return types.QValueKindInt16, nil } case mysql.MYSQL_TYPE_INT24, mysql.MYSQL_TYPE_LONG: if unsigned { - return qvalue.QValueKindUInt32, nil + return types.QValueKindUInt32, nil } else { - return qvalue.QValueKindInt32, nil + return types.QValueKindInt32, nil } case mysql.MYSQL_TYPE_LONGLONG: if unsigned { - return qvalue.QValueKindUInt64, nil + return types.QValueKindUInt64, nil } else { - return qvalue.QValueKindInt64, nil + return types.QValueKindInt64, nil } case mysql.MYSQL_TYPE_FLOAT: - return qvalue.QValueKindFloat32, nil + return types.QValueKindFloat32, nil case mysql.MYSQL_TYPE_DOUBLE: - return qvalue.QValueKindFloat64, nil + return types.QValueKindFloat64, nil case mysql.MYSQL_TYPE_NULL: - return qvalue.QValueKindInvalid, nil - case mysql.MYSQL_TYPE_TIMESTAMP: - return qvalue.QValueKindTimestamp, nil - case mysql.MYSQL_TYPE_DATE: - return qvalue.QValueKindDate, nil - case mysql.MYSQL_TYPE_TIME: - return qvalue.QValueKindTime, nil - case mysql.MYSQL_TYPE_DATETIME: - return qvalue.QValueKindTimestamp, nil + return types.QValueKindInvalid, nil + case mysql.MYSQL_TYPE_DATE, mysql.MYSQL_TYPE_NEWDATE: + return types.QValueKindDate, nil + case mysql.MYSQL_TYPE_TIMESTAMP, mysql.MYSQL_TYPE_TIME, mysql.MYSQL_TYPE_DATETIME, + mysql.MYSQL_TYPE_TIMESTAMP2, mysql.MYSQL_TYPE_DATETIME2, mysql.MYSQL_TYPE_TIME2: + return types.QValueKindTimestamp, nil case mysql.MYSQL_TYPE_YEAR: - return qvalue.QValueKindInt16, nil - case mysql.MYSQL_TYPE_NEWDATE: - return qvalue.QValueKindDate, nil - case mysql.MYSQL_TYPE_VARCHAR: - return qvalue.QValueKindString, nil + return types.QValueKindInt16, nil case mysql.MYSQL_TYPE_BIT: - return qvalue.QValueKindInt64, nil - case mysql.MYSQL_TYPE_TIMESTAMP2: - return qvalue.QValueKindTimestamp, nil - case mysql.MYSQL_TYPE_DATETIME2: - return qvalue.QValueKindTimestamp, nil - case mysql.MYSQL_TYPE_TIME2: - return qvalue.QValueKindTime, nil + return types.QValueKindInt64, nil case mysql.MYSQL_TYPE_JSON: - return qvalue.QValueKindJSON, nil + return types.QValueKindJSON, nil case mysql.MYSQL_TYPE_DECIMAL, mysql.MYSQL_TYPE_NEWDECIMAL: - return qvalue.QValueKindNumeric, nil + return types.QValueKindNumeric, nil case mysql.MYSQL_TYPE_ENUM: - return qvalue.QValueKindEnum, nil + return types.QValueKindEnum, nil case mysql.MYSQL_TYPE_SET: - return qvalue.QValueKindString, nil + return types.QValueKindString, nil case mysql.MYSQL_TYPE_TINY_BLOB, mysql.MYSQL_TYPE_MEDIUM_BLOB, mysql.MYSQL_TYPE_LONG_BLOB, mysql.MYSQL_TYPE_BLOB: if field.Charset == 0x3f { // binary https://dev.mysql.com/doc/dev/mysql-server/8.4.3/page_protocol_basic_character_set.html - return qvalue.QValueKindBytes, nil + return types.QValueKindBytes, nil } else { - return qvalue.QValueKindString, nil + return types.QValueKindString, nil } - case mysql.MYSQL_TYPE_VAR_STRING, mysql.MYSQL_TYPE_STRING: - return qvalue.QValueKindString, nil + case mysql.MYSQL_TYPE_VAR_STRING, mysql.MYSQL_TYPE_STRING, mysql.MYSQL_TYPE_VARCHAR: + return types.QValueKindString, nil case mysql.MYSQL_TYPE_GEOMETRY: - return qvalue.QValueKindGeometry, nil + return types.QValueKindGeometry, nil case mysql.MYSQL_TYPE_VECTOR: - return qvalue.QValueKindArrayFloat32, nil + return types.QValueKindArrayFloat32, nil default: - return qvalue.QValueKind(""), fmt.Errorf("unknown mysql type %d", field.Type) + return types.QValueKind(""), fmt.Errorf("unknown mysql type %d", field.Type) } } -func qkindFromMysqlColumnType(ct string) (qvalue.QValueKind, error) { - ct, isUnsigned := strings.CutSuffix(ct, " unsigned") - ct, param, _ := strings.Cut(ct, "(") - switch strings.ToLower(ct) { - case "json": - return qvalue.QValueKindJSON, nil - case "char", "varchar", "text", "set", "tinytext", "mediumtext", "longtext": - return qvalue.QValueKindString, nil - case "enum": - return qvalue.QValueKindEnum, nil - case "binary", "varbinary", "blob", "tinyblob", "mediumblob", "longblob": - return qvalue.QValueKindBytes, nil - case "date": - return qvalue.QValueKindDate, nil - case "time": - return qvalue.QValueKindTime, nil - case "datetime", "timestamp": - return qvalue.QValueKindTimestamp, nil - case "decimal", "numeric": - return qvalue.QValueKindNumeric, nil - case "float": - return qvalue.QValueKindFloat32, nil - case "double": - return qvalue.QValueKindFloat64, nil - case "tinyint": - if strings.HasPrefix(param, "1)") { - return qvalue.QValueKindBoolean, nil - } else if isUnsigned { - return qvalue.QValueKindUInt8, nil - } else { - return qvalue.QValueKindInt8, nil - } - case "smallint", "year": - if isUnsigned { - return qvalue.QValueKindUInt16, nil - } else { - return qvalue.QValueKindInt16, nil - } - case "mediumint", "int": - if isUnsigned { - return qvalue.QValueKindUInt32, nil - } else { - return qvalue.QValueKindInt32, nil - } - case "bit": - return qvalue.QValueKindUInt64, nil - case "bigint": - if isUnsigned { - return qvalue.QValueKindUInt64, nil - } else { - return qvalue.QValueKindInt64, nil - } - case "vector": - return qvalue.QValueKindArrayFloat32, nil - case "geometry", "point", "polygon", "linestring", "multipoint", "multipolygon", "geomcollection": - return qvalue.QValueKindGeometry, nil - default: - return qvalue.QValueKind(""), fmt.Errorf("unknown mysql type %s", ct) - } -} - -func QRecordSchemaFromMysqlFields(tableSchema *protos.TableSchema, fields []*mysql.Field) (qvalue.QRecordSchema, error) { +func QRecordSchemaFromMysqlFields(tableSchema *protos.TableSchema, fields []*mysql.Field) (types.QRecordSchema, error) { tableColumns := make(map[string]*protos.FieldDescription, len(tableSchema.Columns)) for _, col := range tableSchema.Columns { tableColumns[col.Name] = col } - schema := make([]qvalue.QField, 0, len(fields)) + schema := make([]types.QField, 0, len(fields)) for _, field := range fields { var precision int16 var scale int16 name := string(field.Name) - var qkind qvalue.QValueKind + var qkind types.QValueKind if col, ok := tableColumns[name]; ok { - qkind = qvalue.QValueKind(col.Type) - if qkind == qvalue.QValueKindNumeric { + qkind = types.QValueKind(col.Type) + if qkind == types.QValueKindNumeric { precision, scale = datatypes.ParseNumericTypmod(col.TypeModifier) } } else { var err error qkind, err = qkindFromMysql(field) if err != nil { - return qvalue.QRecordSchema{}, err + return types.QRecordSchema{}, err } } - schema = append(schema, qvalue.QField{ + schema = append(schema, types.QField{ Name: name, Type: qkind, Precision: precision, @@ -195,7 +121,7 @@ func QRecordSchemaFromMysqlFields(tableSchema *protos.TableSchema, fields []*mys Nullable: (field.Flag & mysql.NOT_NULL_FLAG) == 0, }) } - return qvalue.QRecordSchema{Fields: schema}, nil + return types.QRecordSchema{Fields: schema}, nil } // Helper function to convert MySQL geometry binary data to WKT format @@ -215,19 +141,19 @@ func geometryValueFromBytes(wkbData []byte) (string, error) { } // Helper function to process geometry data and return a QValueGeometry -func processGeometryData(data []byte) qvalue.QValueGeometry { +func processGeometryData(data []byte) types.QValueGeometry { // For geometry data, we need to convert from MySQL's binary format to WKT if len(data) > 4 { wkt, err := geometryValueFromBytes(data) if err == nil { - return qvalue.QValueGeometry{Val: wkt} + return types.QValueGeometry{Val: wkt} } } - return qvalue.QValueGeometry{Val: string(data)} + return types.QValueGeometry{Val: string(data)} } // https://dev.mysql.com/doc/refman/8.4/en/time.html -func processTime(str string) (time.Time, error) { +func processTime(str string) (time.Duration, error) { abs, isNeg := strings.CutPrefix(str, "-") tpart, frac, _ := strings.Cut(abs, ".") @@ -235,7 +161,7 @@ func processTime(str string) (time.Time, error) { if frac != "" { fint, err := strconv.ParseUint(frac, 10, 64) if err != nil { - return time.Time{}, err + return 0, err } if len(frac) <= 9 { nsec = fint * uint64(math.Pow10(9-len(frac))) @@ -244,6 +170,10 @@ func processTime(str string) (time.Time, error) { } } + if nsec > 999999999 { + return 0, fmt.Errorf("nanoseconds (%d) should not exceed one second", nsec) + } + var err error var spart, mpart, hpart uint64 h, ms, hasMS := strings.Cut(tpart, ":") @@ -276,75 +206,76 @@ func processTime(str string) (time.Time, error) { } if err != nil { - return time.Time{}, err + return 0, err } sec := hpart*3600 + mpart*60 + spart + val := time.Duration(sec)*time.Second + time.Duration(nsec) if isNeg { - return time.Unix(-int64(sec), -int64(nsec)).UTC(), nil + return -val, nil } - return time.Unix(int64(sec), int64(nsec)).UTC(), nil + return val, nil } -func QValueFromMysqlFieldValue(qkind qvalue.QValueKind, fv mysql.FieldValue) (qvalue.QValue, error) { +func QValueFromMysqlFieldValue(qkind types.QValueKind, mytype byte, fv mysql.FieldValue) (types.QValue, error) { switch fv.Type { case mysql.FieldValueTypeNull: - return qvalue.QValueNull(qkind), nil + return types.QValueNull(qkind), nil case mysql.FieldValueTypeUnsigned: v := fv.AsUint64() switch qkind { - case qvalue.QValueKindBoolean: - return qvalue.QValueBoolean{Val: v != 0}, nil - case qvalue.QValueKindInt8: - return qvalue.QValueInt8{Val: int8(v)}, nil - case qvalue.QValueKindInt16: - return qvalue.QValueInt16{Val: int16(v)}, nil - case qvalue.QValueKindInt32: - return qvalue.QValueInt32{Val: int32(v)}, nil - case qvalue.QValueKindInt64: - return qvalue.QValueInt64{Val: int64(v)}, nil - case qvalue.QValueKindUInt8: - return qvalue.QValueUInt8{Val: uint8(v)}, nil - case qvalue.QValueKindUInt16: - return qvalue.QValueUInt16{Val: uint16(v)}, nil - case qvalue.QValueKindUInt32: - return qvalue.QValueUInt32{Val: uint32(v)}, nil - case qvalue.QValueKindUInt64: - return qvalue.QValueUInt64{Val: v}, nil + case types.QValueKindBoolean: + return types.QValueBoolean{Val: v != 0}, nil + case types.QValueKindInt8: + return types.QValueInt8{Val: int8(v)}, nil + case types.QValueKindInt16: + return types.QValueInt16{Val: int16(v)}, nil + case types.QValueKindInt32: + return types.QValueInt32{Val: int32(v)}, nil + case types.QValueKindInt64: + return types.QValueInt64{Val: int64(v)}, nil + case types.QValueKindUInt8: + return types.QValueUInt8{Val: uint8(v)}, nil + case types.QValueKindUInt16: + return types.QValueUInt16{Val: uint16(v)}, nil + case types.QValueKindUInt32: + return types.QValueUInt32{Val: uint32(v)}, nil + case types.QValueKindUInt64: + return types.QValueUInt64{Val: v}, nil default: return nil, fmt.Errorf("cannot convert uint64 to %s", qkind) } case mysql.FieldValueTypeSigned: v := fv.AsInt64() switch qkind { - case qvalue.QValueKindBoolean: - return qvalue.QValueBoolean{Val: v != 0}, nil - case qvalue.QValueKindInt8: - return qvalue.QValueInt8{Val: int8(v)}, nil - case qvalue.QValueKindInt16: - return qvalue.QValueInt16{Val: int16(v)}, nil - case qvalue.QValueKindInt32: - return qvalue.QValueInt32{Val: int32(v)}, nil - case qvalue.QValueKindInt64: - return qvalue.QValueInt64{Val: v}, nil - case qvalue.QValueKindUInt8: - return qvalue.QValueUInt8{Val: uint8(v)}, nil - case qvalue.QValueKindUInt16: - return qvalue.QValueUInt16{Val: uint16(v)}, nil - case qvalue.QValueKindUInt32: - return qvalue.QValueUInt32{Val: uint32(v)}, nil - case qvalue.QValueKindUInt64: - return qvalue.QValueUInt64{Val: uint64(v)}, nil + case types.QValueKindBoolean: + return types.QValueBoolean{Val: v != 0}, nil + case types.QValueKindInt8: + return types.QValueInt8{Val: int8(v)}, nil + case types.QValueKindInt16: + return types.QValueInt16{Val: int16(v)}, nil + case types.QValueKindInt32: + return types.QValueInt32{Val: int32(v)}, nil + case types.QValueKindInt64: + return types.QValueInt64{Val: v}, nil + case types.QValueKindUInt8: + return types.QValueUInt8{Val: uint8(v)}, nil + case types.QValueKindUInt16: + return types.QValueUInt16{Val: uint16(v)}, nil + case types.QValueKindUInt32: + return types.QValueUInt32{Val: uint32(v)}, nil + case types.QValueKindUInt64: + return types.QValueUInt64{Val: uint64(v)}, nil default: return nil, fmt.Errorf("cannot convert int64 to %s", qkind) } case mysql.FieldValueTypeFloat: v := fv.AsFloat64() switch qkind { - case qvalue.QValueKindFloat32: - return qvalue.QValueFloat32{Val: float32(v)}, nil - case qvalue.QValueKindFloat64: - return qvalue.QValueFloat64{Val: float64(v)}, nil + case types.QValueKindFloat32: + return types.QValueFloat32{Val: float32(v)}, nil + case types.QValueKindFloat64: + return types.QValueFloat64{Val: float64(v)}, nil default: return nil, fmt.Errorf("cannot convert float64 to %s", qkind) } @@ -352,58 +283,68 @@ func QValueFromMysqlFieldValue(qkind qvalue.QValueKind, fv mysql.FieldValue) (qv v := fv.AsString() unsafeString := shared.UnsafeFastReadOnlyBytesToString(v) switch qkind { - case qvalue.QValueKindUInt64: // bit + case types.QValueKindUInt64: // bit var bit uint64 for _, b := range v { bit = (bit << 8) | uint64(b) } - return qvalue.QValueUInt64{Val: bit}, nil - case qvalue.QValueKindString: - return qvalue.QValueString{Val: string(v)}, nil - case qvalue.QValueKindEnum: - return qvalue.QValueEnum{Val: string(v)}, nil - case qvalue.QValueKindBytes: - return qvalue.QValueBytes{Val: slices.Clone(v)}, nil - case qvalue.QValueKindJSON: - return qvalue.QValueJSON{Val: string(v)}, nil - case qvalue.QValueKindGeometry: + return types.QValueUInt64{Val: bit}, nil + case types.QValueKindString: + return types.QValueString{Val: string(v)}, nil + case types.QValueKindEnum: + return types.QValueEnum{Val: string(v)}, nil + case types.QValueKindBytes: + return types.QValueBytes{Val: slices.Clone(v)}, nil + case types.QValueKindJSON: + return types.QValueJSON{Val: string(v)}, nil + case types.QValueKindGeometry: return processGeometryData(v), nil - case qvalue.QValueKindNumeric: + case types.QValueKindNumeric: val, err := decimal.NewFromString(unsafeString) if err != nil { return nil, err } - return qvalue.QValueNumeric{Val: val}, nil - case qvalue.QValueKindTimestamp: + return types.QValueNumeric{Val: val}, nil + case types.QValueKindTimestamp: + if mytype == mysql.MYSQL_TYPE_TIME || mytype == mysql.MYSQL_TYPE_TIME2 { + tm, err := processTime(unsafeString) + if err != nil { + return nil, err + } + return types.QValueTimestamp{Val: time.Unix(0, 0).UTC().Add(tm)}, nil + } if strings.HasPrefix(unsafeString, "0000-00-00") { - return qvalue.QValueTimestamp{Val: time.Unix(0, 0)}, nil + return types.QValueTimestamp{Val: time.Unix(0, 0)}, nil } val, err := time.Parse("2006-01-02 15:04:05.999999", unsafeString) if err != nil { return nil, err } - return qvalue.QValueTimestamp{Val: val}, nil - case qvalue.QValueKindTime: + return types.QValueTimestamp{Val: val}, nil + case types.QValueKindTime: + // deprecated: most databases expect time to be time part of datetime + // mysql it's a +/- 800 hour range to represent duration + // keep codepath for backwards compat when mysql time was mapped to QValueKindTime tm, err := processTime(unsafeString) if err != nil { return nil, err } - return qvalue.QValueTime{Val: tm}, nil - case qvalue.QValueKindDate: + return types.QValueTime{Val: tm}, nil + case types.QValueKindDate: if unsafeString == "0000-00-00" { - return qvalue.QValueDate{Val: time.Unix(0, 0)}, nil + return types.QValueDate{Val: time.Unix(0, 0)}, nil } val, err := time.Parse(time.DateOnly, unsafeString) if err != nil { return nil, err } - return qvalue.QValueDate{Val: val}, nil - case qvalue.QValueKindArrayFloat32: + return types.QValueDate{Val: val}, nil + case types.QValueKindArrayFloat32: floats := make([]float32, 0, len(v)/4) for i := 0; i < len(v); i += 4 { floats = append(floats, math.Float32frombits(binary.LittleEndian.Uint32(v[i:]))) } - return qvalue.QValueArrayFloat32{Val: floats}, nil + return types.QValueArrayFloat32{Val: floats}, nil default: return nil, fmt.Errorf("cannot convert bytes %v to %s", v, qkind) } @@ -414,46 +355,46 @@ func QValueFromMysqlFieldValue(qkind qvalue.QValueKind, fv mysql.FieldValue) (qv func QValueFromMysqlRowEvent( mytype byte, enums []string, sets []string, - qkind qvalue.QValueKind, val any, -) (qvalue.QValue, error) { + qkind types.QValueKind, val any, +) (types.QValue, error) { // See go-mysql row_event.go for mapping switch val := val.(type) { case nil: - return qvalue.QValueNull(qkind), nil + return types.QValueNull(qkind), nil case int8: // go-mysql reads all integers as signed, consumer needs to check metadata & convert - if qkind == qvalue.QValueKindBoolean { - return qvalue.QValueBoolean{Val: val != 0}, nil - } else if qkind == qvalue.QValueKindUInt8 { - return qvalue.QValueUInt8{Val: uint8(val)}, nil + if qkind == types.QValueKindBoolean { + return types.QValueBoolean{Val: val != 0}, nil + } else if qkind == types.QValueKindUInt8 { + return types.QValueUInt8{Val: uint8(val)}, nil } else { - return qvalue.QValueInt8{Val: val}, nil + return types.QValueInt8{Val: val}, nil } case int16: - if qkind == qvalue.QValueKindUInt16 { - return qvalue.QValueUInt16{Val: uint16(val)}, nil + if qkind == types.QValueKindUInt16 { + return types.QValueUInt16{Val: uint16(val)}, nil } else { - return qvalue.QValueInt16{Val: val}, nil + return types.QValueInt16{Val: val}, nil } case int32: - if qkind == qvalue.QValueKindUInt32 { + if qkind == types.QValueKindUInt32 { if mytype == mysql.MYSQL_TYPE_INT24 { - return qvalue.QValueUInt32{Val: uint32(val) & 0xFFFFFF}, nil + return types.QValueUInt32{Val: uint32(val) & 0xFFFFFF}, nil } else { - return qvalue.QValueUInt32{Val: uint32(val)}, nil + return types.QValueUInt32{Val: uint32(val)}, nil } } else { - return qvalue.QValueInt32{Val: val}, nil + return types.QValueInt32{Val: val}, nil } case int64: switch qkind { - case qvalue.QValueKindUInt64: - return qvalue.QValueUInt64{Val: uint64(val)}, nil - case qvalue.QValueKindInt64: - return qvalue.QValueInt64{Val: val}, nil - case qvalue.QValueKindString: // set + case types.QValueKindUInt64: + return types.QValueUInt64{Val: uint64(val)}, nil + case types.QValueKindInt64: + return types.QValueInt64{Val: val}, nil + case types.QValueKindString: // set var set []string if sets == nil { - return qvalue.QValueString{Val: strconv.FormatInt(val, 10)}, nil + return types.QValueString{Val: strconv.FormatInt(val, 10)}, nil } for val != 0 { idx := bits.TrailingZeros64(uint64(val)) @@ -464,86 +405,93 @@ func QValueFromMysqlRowEvent( return nil, fmt.Errorf("set value out of range %d %v", idx, sets) } } - return qvalue.QValueString{Val: strings.Join(set, ",")}, nil - case qvalue.QValueKindEnum: // enum + return types.QValueString{Val: strings.Join(set, ",")}, nil + case types.QValueKindEnum: // enum if val == 0 { - return qvalue.QValueEnum{Val: ""}, nil + return types.QValueEnum{Val: ""}, nil } else if int(val)-1 < len(enums) { - return qvalue.QValueEnum{Val: enums[int(val)-1]}, nil + return types.QValueEnum{Val: enums[int(val)-1]}, nil } else if enums == nil { - return qvalue.QValueEnum{Val: strconv.FormatInt(val, 10)}, nil + return types.QValueEnum{Val: strconv.FormatInt(val, 10)}, nil } else { return nil, fmt.Errorf("enum value out of range %d %v", val, enums) } } case float32: - return qvalue.QValueFloat32{Val: val}, nil + return types.QValueFloat32{Val: val}, nil case float64: - return qvalue.QValueFloat64{Val: val}, nil + return types.QValueFloat64{Val: val}, nil case decimal.Decimal: - return qvalue.QValueNumeric{Val: val}, nil + return types.QValueNumeric{Val: val}, nil case int: // YEAR: https://dev.mysql.com/doc/refman/8.4/en/year.html - return qvalue.QValueInt16{Val: int16(val)}, nil + return types.QValueInt16{Val: int16(val)}, nil case time.Time: - return qvalue.QValueTimestamp{Val: val}, nil + return types.QValueTimestamp{Val: val}, nil case *replication.JsonDiff: // TODO support somehow?? - return qvalue.QValueNull(qvalue.QValueKindJSON), nil + return types.QValueNull(types.QValueKindJSON), nil case []byte: switch qkind { - case qvalue.QValueKindBytes: - return qvalue.QValueBytes{Val: val}, nil - case qvalue.QValueKindString: - return qvalue.QValueString{Val: string(val)}, nil - case qvalue.QValueKindEnum: - return qvalue.QValueEnum{Val: string(val)}, nil - case qvalue.QValueKindJSON: - return qvalue.QValueJSON{Val: string(val)}, nil - case qvalue.QValueKindGeometry: + case types.QValueKindBytes: + return types.QValueBytes{Val: val}, nil + case types.QValueKindString: + return types.QValueString{Val: string(val)}, nil + case types.QValueKindEnum: + return types.QValueEnum{Val: string(val)}, nil + case types.QValueKindJSON: + return types.QValueJSON{Val: string(val)}, nil + case types.QValueKindGeometry: // Handle geometry data as binary (WKB format) return processGeometryData(val), nil - case qvalue.QValueKindArrayFloat32: + case types.QValueKindArrayFloat32: floats := make([]float32, 0, len(val)/4) for i := 0; i < len(val); i += 4 { floats = append(floats, math.Float32frombits(binary.LittleEndian.Uint32(val[i:]))) } - return qvalue.QValueArrayFloat32{Val: floats}, nil + return types.QValueArrayFloat32{Val: floats}, nil } case string: switch qkind { - case qvalue.QValueKindBytes: - return qvalue.QValueBytes{Val: shared.UnsafeFastStringToReadOnlyBytes(val)}, nil - case qvalue.QValueKindString: - return qvalue.QValueString{Val: val}, nil - case qvalue.QValueKindEnum: - return qvalue.QValueEnum{Val: val}, nil - case qvalue.QValueKindJSON: - return qvalue.QValueJSON{Val: val}, nil - case qvalue.QValueKindTime: + case types.QValueKindBytes: + return types.QValueBytes{Val: shared.UnsafeFastStringToReadOnlyBytes(val)}, nil + case types.QValueKindString: + return types.QValueString{Val: val}, nil + case types.QValueKindEnum: + return types.QValueEnum{Val: val}, nil + case types.QValueKindJSON: + return types.QValueJSON{Val: val}, nil + case types.QValueKindTime: tm, err := processTime(val) if err != nil { return nil, err } - return qvalue.QValueTime{Val: tm}, nil - case qvalue.QValueKindDate: + return types.QValueTime{Val: tm}, nil + case types.QValueKindDate: if val == "0000-00-00" { - return qvalue.QValueDate{Val: time.Unix(0, 0)}, nil + return types.QValueDate{Val: time.Unix(0, 0)}, nil } val, err := time.Parse(time.DateOnly, val) if err != nil { return nil, err } - return qvalue.QValueDate{Val: val}, nil - case qvalue.QValueKindTimestamp: // 0000-00-00 ends up here + return types.QValueDate{Val: val}, nil + case types.QValueKindTimestamp: // 0000-00-00 ends up here + if mytype == mysql.MYSQL_TYPE_TIME || mytype == mysql.MYSQL_TYPE_TIME2 { + tm, err := processTime(val) + if err != nil { + return nil, err + } + return types.QValueTimestamp{Val: time.Unix(0, 0).UTC().Add(tm)}, nil + } if strings.HasPrefix(val, "0000-00-00") { - return qvalue.QValueTimestamp{Val: time.Unix(0, 0)}, nil + return types.QValueTimestamp{Val: time.Unix(0, 0)}, nil } - val, err := time.Parse("2006-01-02 15:04:05.999999", val) + tm, err := time.Parse("2006-01-02 15:04:05.999999", val) if err != nil { return nil, err } - return qvalue.QValueTimestamp{Val: val}, nil + return types.QValueTimestamp{Val: tm}, nil } } return nil, fmt.Errorf("unexpected type %T for mysql type %d, qkind %s", val, mytype, qkind) diff --git a/flow/connectors/mysql/qvalue_convert_test.go b/flow/connectors/mysql/qvalue_convert_test.go index d2c087af26..be533f4689 100644 --- a/flow/connectors/mysql/qvalue_convert_test.go +++ b/flow/connectors/mysql/qvalue_convert_test.go @@ -8,40 +8,42 @@ import ( ) func TestProcessTime(t *testing.T) { + epoch := time.Unix(0, 0).UTC() + //nolint:govet for _, ts := range []struct { - out time.Time + out time.Duration in string }{ - {time.Date(1970, 1, 1, 23, 30, 0, 500000000, time.UTC), "23:30.5"}, - {time.Date(1970, 2, 3, 8, 0, 1, 0, time.UTC), "800:0:1"}, - {time.Date(1969, 11, 28, 15, 59, 59, 0, time.UTC), "-800:0:1"}, - {time.Date(1969, 11, 28, 15, 59, 58, 900000000, time.UTC), "-800:0:1.1"}, - {time.Date(1970, 1, 1, 0, 0, 1, 0, time.UTC), "1."}, - {time.Date(1970, 1, 1, 0, 12, 34, 0, time.UTC), "1234"}, - {time.Date(1970, 1, 1, 3, 12, 34, 0, time.UTC), "31234"}, - {time.Date(1970, 1, 1, 0, 0, 1, 120000000, time.UTC), "1.12"}, - {time.Date(1970, 1, 1, 0, 0, 1, 12000000, time.UTC), "1.012"}, - {time.Date(1970, 1, 1, 0, 0, 1, 12300000, time.UTC), "1.0123"}, - {time.Date(1970, 1, 1, 0, 0, 1, 1230000, time.UTC), "1.00123"}, - {time.Date(1970, 1, 1, 0, 0, 1, 1000, time.UTC), "1.000001"}, - {time.Date(1970, 1, 1, 0, 0, 1, 200, time.UTC), "1.0000002"}, - {time.Date(1970, 1, 1, 0, 0, 1, 30, time.UTC), "1.00000003"}, - {time.Date(1970, 1, 1, 0, 0, 1, 4, time.UTC), "1.000000004"}, - {time.Time{}, "123.aa"}, - {time.Time{}, "hh:00:00"}, - {time.Time{}, "00:mm:00"}, - {time.Time{}, "00:00:ss"}, - {time.Time{}, "hh:00"}, - {time.Time{}, "00:mm"}, - {time.Time{}, "ss"}, - {time.Time{}, "mm00"}, - {time.Time{}, "00ss"}, - {time.Time{}, "hh0000"}, - {time.Time{}, "00mm00"}, - {time.Time{}, "0000ss"}, + {time.Date(1970, 1, 1, 23, 30, 0, 500000000, time.UTC).Sub(epoch), "23:30.5"}, + {time.Date(1970, 2, 3, 8, 0, 1, 0, time.UTC).Sub(epoch), "800:0:1"}, + {time.Date(1969, 11, 28, 15, 59, 59, 0, time.UTC).Sub(epoch), "-800:0:1"}, + {time.Date(1969, 11, 28, 15, 59, 58, 900000000, time.UTC).Sub(epoch), "-800:0:1.1"}, + {time.Date(1970, 1, 1, 0, 0, 1, 0, time.UTC).Sub(epoch), "1."}, + {time.Date(1970, 1, 1, 0, 12, 34, 0, time.UTC).Sub(epoch), "1234"}, + {time.Date(1970, 1, 1, 3, 12, 34, 0, time.UTC).Sub(epoch), "31234"}, + {time.Date(1970, 1, 1, 0, 0, 1, 120000000, time.UTC).Sub(epoch), "1.12"}, + {time.Date(1970, 1, 1, 0, 0, 1, 12000000, time.UTC).Sub(epoch), "1.012"}, + {time.Date(1970, 1, 1, 0, 0, 1, 12300000, time.UTC).Sub(epoch), "1.0123"}, + {time.Date(1970, 1, 1, 0, 0, 1, 1230000, time.UTC).Sub(epoch), "1.00123"}, + {time.Date(1970, 1, 1, 0, 0, 1, 1000, time.UTC).Sub(epoch), "1.000001"}, + {time.Date(1970, 1, 1, 0, 0, 1, 200, time.UTC).Sub(epoch), "1.0000002"}, + {time.Date(1970, 1, 1, 0, 0, 1, 30, time.UTC).Sub(epoch), "1.00000003"}, + {time.Date(1970, 1, 1, 0, 0, 1, 4, time.UTC).Sub(epoch), "1.000000004"}, + {0, "123.aa"}, + {0, "hh:00:00"}, + {0, "00:mm:00"}, + {0, "00:00:ss"}, + {0, "hh:00"}, + {0, "00:mm"}, + {0, "ss"}, + {0, "mm00"}, + {0, "00ss"}, + {0, "hh0000"}, + {0, "00mm00"}, + {0, "0000ss"}, } { tm, err := processTime(ts.in) - if tm.IsZero() { + if tm == 0 { require.Error(t, err) } else { require.NoError(t, err) diff --git a/flow/connectors/mysql/schema.go b/flow/connectors/mysql/schema.go index 56a4d0aaba..a293e375cd 100644 --- a/flow/connectors/mysql/schema.go +++ b/flow/connectors/mysql/schema.go @@ -73,7 +73,7 @@ func (c *MySqlConnector) GetTablesInSchema( return &protos.SchemaTablesResponse{Tables: tables}, nil } -func (c *MySqlConnector) GetColumns(ctx context.Context, schema string, table string) (*protos.TableColumnsResponse, error) { +func (c *MySqlConnector) GetColumns(ctx context.Context, version uint32, schema string, table string) (*protos.TableColumnsResponse, error) { rs, err := c.Execute(ctx, `select column_name, column_type, column_key from information_schema.columns where table_schema = ? and table_name = ? order by column_name`, schema, table) @@ -95,7 +95,7 @@ func (c *MySqlConnector) GetColumns(ctx context.Context, schema string, table st if err != nil { return nil, err } - qkind, err := qkindFromMysqlColumnType(columnType) + qkind, err := mysql.QkindFromMysqlColumnType(columnType) if err != nil { return nil, err } diff --git a/flow/connectors/postgres/cdc.go b/flow/connectors/postgres/cdc.go index 4015275c80..2fea90e9c1 100644 --- a/flow/connectors/postgres/cdc.go +++ b/flow/connectors/postgres/cdc.go @@ -17,25 +17,27 @@ import ( "github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgproto3" "github.com/jackc/pgx/v5/pgtype" - "github.com/lib/pq/oid" + "github.com/pgvector/pgvector-go" "go.temporal.io/sdk/log" connmetadata "github.com/PeerDB-io/peerdb/flow/connectors/external_metadata" "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/connectors/utils/monitoring" - geo "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/otel_metrics" "github.com/PeerDB-io/peerdb/flow/shared" + geo "github.com/PeerDB-io/peerdb/flow/shared/datatypes" "github.com/PeerDB-io/peerdb/flow/shared/exceptions" + "github.com/PeerDB-io/peerdb/flow/shared/postgres" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type PostgresCDCSource struct { *PostgresConnector srcTableIDNameMapping map[uint32]string + schemaNameForRelID map[uint32]string tableNameMapping map[string]model.NameAndExclude tableNameSchemaMapping map[string]*protos.TableSchema relationMessageMapping model.RelationMessageMapping @@ -53,6 +55,7 @@ type PostgresCDCSource struct { hushWarnUnknownTableDetected map[uint32]struct{} flowJobName string handleInheritanceForNonPartitionedTables bool + internalVersion uint32 } type PostgresCDCConfig struct { @@ -66,6 +69,8 @@ type PostgresCDCConfig struct { Slot string Publication string HandleInheritanceForNonPartitionedTables bool + SourceSchemaAsDestinationColumn bool + InternalVersion uint32 } // Create a new PostgresCDCSource @@ -77,9 +82,15 @@ func (c *PostgresConnector) NewPostgresCDCSource(ctx context.Context, cdcConfig return nil, fmt.Errorf("error getting child to parent relid map: %w", err) } + var schemaNameForRelID map[uint32]string + if cdcConfig.SourceSchemaAsDestinationColumn { + schemaNameForRelID = make(map[uint32]string, len(cdcConfig.TableNameSchemaMapping)) + } + return &PostgresCDCSource{ PostgresConnector: c, srcTableIDNameMapping: cdcConfig.SrcTableIDNameMapping, + schemaNameForRelID: schemaNameForRelID, tableNameMapping: cdcConfig.TableNameMapping, tableNameSchemaMapping: cdcConfig.TableNameSchemaMapping, relationMessageMapping: cdcConfig.RelationMessageMapping, @@ -93,9 +104,25 @@ func (c *PostgresConnector) NewPostgresCDCSource(ctx context.Context, cdcConfig hushWarnUnknownTableDetected: make(map[uint32]struct{}), flowJobName: cdcConfig.FlowJobName, handleInheritanceForNonPartitionedTables: cdcConfig.HandleInheritanceForNonPartitionedTables, + internalVersion: cdcConfig.InternalVersion, }, nil } +func (p *PostgresCDCSource) getSourceSchemaForDestinationColumn(relID uint32, tableName string) (string, error) { + if p.schemaNameForRelID == nil { + return "", nil + } else if schema, ok := p.schemaNameForRelID[relID]; ok { + return schema, nil + } + + schemaTable, err := utils.ParseSchemaTable(tableName) + if err != nil { + return "", err + } + p.schemaNameForRelID[relID] = schemaTable.Schema + return schemaTable.Schema, nil +} + func getChildToParentRelIDMap(ctx context.Context, conn *pgx.Conn, parentTableOIDs []uint32, handleInheritanceForNonPartitionedTables bool, ) (map[uint32]uint32, error) { @@ -140,6 +167,8 @@ type replProcessor[Items model.Items] interface { col *pglogrepl.RelationMessageColumn, customTypeMapping map[uint32]shared.CustomDataType, ) error + + AddStringColumn(items Items, name string, value string) } type pgProcessor struct{} @@ -173,6 +202,10 @@ func (pgProcessor) Process( return nil } +func (pgProcessor) AddStringColumn(items model.PgItems, name string, value string) { + items.AddColumn(name, shared.UnsafeFastStringToReadOnlyBytes(value)) +} + type qProcessor struct{} func (qProcessor) NewItems(size int) model.RecordItems { @@ -188,10 +221,12 @@ func (qProcessor) Process( ) error { switch tuple.DataType { case 'n': // null - items.AddColumn(col.Name, qvalue.QValueNull(qvalue.QValueKindInvalid)) + items.AddColumn(col.Name, types.QValueNull(types.QValueKindInvalid)) case 't': // text // bytea also appears here as a hex - data, err := p.decodeColumnData(tuple.Data, col.DataType, pgtype.TextFormatCode, customTypeMapping) + data, err := p.decodeColumnData( + tuple.Data, col.DataType, col.TypeModifier, pgtype.TextFormatCode, customTypeMapping, p.internalVersion, + ) if err != nil { p.logger.Error("error decoding text column data", slog.Any("error", err), slog.String("columnName", col.Name), slog.Int64("dataType", int64(col.DataType))) @@ -199,7 +234,9 @@ func (qProcessor) Process( } items.AddColumn(col.Name, data) case 'b': // binary - data, err := p.decodeColumnData(tuple.Data, col.DataType, pgtype.BinaryFormatCode, customTypeMapping) + data, err := p.decodeColumnData( + tuple.Data, col.DataType, col.TypeModifier, pgtype.BinaryFormatCode, customTypeMapping, p.internalVersion, + ) if err != nil { return fmt.Errorf("error decoding binary column data: %w", err) } @@ -210,13 +247,18 @@ func (qProcessor) Process( return nil } +func (qProcessor) AddStringColumn(items model.RecordItems, name string, value string) { + items.AddColumn(name, types.QValueString{Val: value}) +} + func processTuple[Items model.Items]( processor replProcessor[Items], p *PostgresCDCSource, tuple *pglogrepl.TupleData, rel *pglogrepl.RelationMessage, - exclude map[string]struct{}, + nameAndExclude model.NameAndExclude, customTypeMapping map[uint32]shared.CustomDataType, + schemaName string, ) (Items, map[string]struct{}, error) { // if the tuple is nil, return an empty map if tuple == nil { @@ -228,7 +270,7 @@ func processTuple[Items model.Items]( for idx, tcol := range tuple.Columns { rcol := rel.Columns[idx] - if _, ok := exclude[rcol.Name]; ok { + if _, ok := nameAndExclude.Exclude[rcol.Name]; ok { continue } if tcol.DataType == 'u' { @@ -241,74 +283,100 @@ func processTuple[Items model.Items]( return none, nil, err } } + + if schemaName != "" { + processor.AddStringColumn(items, "_peerdb_source_schema", schemaName) + } + return items, unchangedToastColumns, nil } func (p *PostgresCDCSource) decodeColumnData( - data []byte, dataType uint32, formatCode int16, customTypeMapping map[uint32]shared.CustomDataType, -) (qvalue.QValue, error) { + data []byte, dataType uint32, typmod int32, formatCode int16, customTypeMapping map[uint32]shared.CustomDataType, version uint32, +) (types.QValue, error) { var parsedData any var err error if dt, ok := p.typeMap.TypeForOID(dataType); ok { - dtOid := oid.Oid(dt.OID) - if dtOid == oid.T_cidr || dtOid == oid.T_inet || dtOid == oid.T_macaddr || dtOid == oid.T_xml { + dtOid := dt.OID + if dtOid == pgtype.CIDROID || dtOid == pgtype.InetOID || dtOid == pgtype.MacaddrOID || dtOid == pgtype.XMLOID { // below is required to decode above types to string parsedData, err = dt.Codec.DecodeDatabaseSQLValue(p.typeMap, dataType, formatCode, data) } else { parsedData, err = dt.Codec.DecodeValue(p.typeMap, dataType, formatCode, data) } if err != nil { - if dtOid == oid.T_time || dtOid == oid.T_timetz || - dtOid == oid.T_timestamp || dtOid == oid.T_timestamptz { + if dtOid == pgtype.TimeOID || dtOid == pgtype.TimetzOID || + dtOid == pgtype.TimestampOID || dtOid == pgtype.TimestamptzOID { // indicates year is more than 4 digits or something similar, - // which you can insert into postgres, - // but not representable by time.Time - p.logger.Warn(fmt.Sprintf("Invalidated and hence nulled %s data: %s", - dt.Name, string(data))) + // which you can insert into postgres, but not representable by time.Time + p.logger.Warn("Invalidate time for destination, nulled", slog.String("typeName", dt.Name), slog.String("value", string(data))) switch dtOid { - case oid.T_time: - return qvalue.QValueNull(qvalue.QValueKindTime), nil - case oid.T_timetz: - return qvalue.QValueNull(qvalue.QValueKindTimeTZ), nil - case oid.T_timestamp: - return qvalue.QValueNull(qvalue.QValueKindTimestamp), nil - case oid.T_timestamptz: - return qvalue.QValueNull(qvalue.QValueKindTimestampTZ), nil + case pgtype.TimeOID: + return types.QValueNull(types.QValueKindTime), nil + case pgtype.TimetzOID: + return types.QValueNull(types.QValueKindTimeTZ), nil + case pgtype.TimestampOID: + return types.QValueNull(types.QValueKindTimestamp), nil + case pgtype.TimestamptzOID: + return types.QValueNull(types.QValueKindTimestampTZ), nil } } return nil, err } - return p.parseFieldFromPostgresOID(dataType, parsedData, customTypeMapping) - } else if dataType == uint32(oid.T_timetz) { // ugly TIMETZ workaround for CDC decoding. - return p.parseFieldFromPostgresOID(dataType, string(data), customTypeMapping) + return p.parseFieldFromPostgresOID(dataType, typmod, parsedData, customTypeMapping, p.internalVersion) + } else if dataType == pgtype.TimetzOID { // ugly TIMETZ workaround for CDC decoding. + return p.parseFieldFromPostgresOID(dataType, typmod, string(data), customTypeMapping, p.internalVersion) } else if typeData, ok := customTypeMapping[dataType]; ok { - customQKind := customTypeToQKind(typeData) + customQKind := postgres.CustomTypeToQKind(typeData, version) switch customQKind { - case qvalue.QValueKindGeography, qvalue.QValueKindGeometry: + case types.QValueKindGeography, types.QValueKindGeometry: wkt, err := geo.GeoValidate(string(data)) if err != nil { - return qvalue.QValueNull(customQKind), nil - } else if customQKind == qvalue.QValueKindGeography { - return qvalue.QValueGeography{Val: wkt}, nil + return types.QValueNull(customQKind), nil + } else if customQKind == types.QValueKindGeography { + return types.QValueGeography{Val: wkt}, nil } else { - return qvalue.QValueGeometry{Val: wkt}, nil + return types.QValueGeometry{Val: wkt}, nil + } + case types.QValueKindHStore: + return types.QValueHStore{Val: string(data)}, nil + case types.QValueKindString: + return types.QValueString{Val: string(data)}, nil + case types.QValueKindEnum: + return types.QValueEnum{Val: string(data)}, nil + case types.QValueKindArrayString: + return types.QValueArrayString{Val: shared.ParsePgArrayToStringSlice(data, typeData.Delim)}, nil + case types.QValueKindArrayFloat32: + switch typeData.Name { + case "vector": + var vector pgvector.Vector + if err := vector.Parse(string(data)); err != nil { + return nil, fmt.Errorf("[pg] failed to parse vector: %w", err) + } + return types.QValueArrayFloat32{Val: vector.Slice()}, nil + case "halfvec": + var halfvec pgvector.HalfVector + if err := halfvec.Parse(string(data)); err != nil { + return nil, fmt.Errorf("[pg] failed to parse halfvec: %w", err) + } + return types.QValueArrayFloat32{Val: halfvec.Slice()}, nil + case "sparsevec": + var sparsevec pgvector.SparseVector + if err := sparsevec.Parse(string(data)); err != nil { + return nil, fmt.Errorf("[pg] failed to parse sparsevec: %w", err) + } + return types.QValueArrayFloat32{Val: sparsevec.Slice()}, nil + default: + return nil, fmt.Errorf("unknown float array type %s", typeData.Name) } - case qvalue.QValueKindHStore: - return qvalue.QValueHStore{Val: string(data)}, nil - case qvalue.QValueKindString: - return qvalue.QValueString{Val: string(data)}, nil - case qvalue.QValueKindEnum: - return qvalue.QValueEnum{Val: string(data)}, nil - case qvalue.QValueKindArrayString: - return qvalue.QValueArrayString{Val: shared.ParsePgArrayToStringSlice(data, typeData.Delim)}, nil - case qvalue.QValueKindArrayEnum: - return qvalue.QValueArrayEnum{Val: shared.ParsePgArrayToStringSlice(data, typeData.Delim)}, nil + case types.QValueKindArrayEnum: + return types.QValueArrayEnum{Val: shared.ParsePgArrayToStringSlice(data, typeData.Delim)}, nil default: - return nil, fmt.Errorf("unknown custom qkind: %s", customQKind) + return nil, fmt.Errorf("unknown custom qkind for %s: %s", typeData.Name, customQKind) } } - return qvalue.QValueString{Val: string(data)}, nil + return types.QValueString{Val: string(data)}, nil } // PullCdcRecords pulls records from req's cdc stream @@ -612,8 +680,7 @@ func PullCdcRecords[Items model.Items]( // otherwise push to records so destination can ack once all previous messages processed if cdcRecordsStorage.IsEmpty() { if int64(clientXLogPos) > req.ConsumedOffset.Load() { - err := p.updateConsumedOffset(ctx, logger, req.FlowJobName, req.ConsumedOffset, clientXLogPos) - if err != nil { + if err := p.updateConsumedOffset(ctx, logger, req.FlowJobName, req.ConsumedOffset, clientXLogPos); err != nil { return err } } @@ -753,7 +820,12 @@ func processInsertMessage[Items model.Items]( return nil, fmt.Errorf("unknown relation id: %d", relID) } - items, _, err := processTuple(processor, p, msg.Tuple, rel, p.tableNameMapping[tableName].Exclude, customTypeMapping) + schemaName, err := p.getSourceSchemaForDestinationColumn(relID, tableName) + if err != nil { + return nil, err + } + + items, _, err := processTuple(processor, p, msg.Tuple, rel, p.tableNameMapping[tableName], customTypeMapping, schemaName) if err != nil { return nil, fmt.Errorf("error converting tuple to map: %w", err) } @@ -789,14 +861,18 @@ func processUpdateMessage[Items model.Items]( return nil, fmt.Errorf("unknown relation id: %d", relID) } - oldItems, _, err := processTuple(processor, p, msg.OldTuple, rel, - p.tableNameMapping[tableName].Exclude, customTypeMapping) + schemaName, err := p.getSourceSchemaForDestinationColumn(relID, tableName) + if err != nil { + return nil, err + } + + oldItems, _, err := processTuple(processor, p, msg.OldTuple, rel, p.tableNameMapping[tableName], customTypeMapping, "") if err != nil { return nil, fmt.Errorf("error converting old tuple to map: %w", err) } newItems, unchangedToastColumns, err := processTuple( - processor, p, msg.NewTuple, rel, p.tableNameMapping[tableName].Exclude, customTypeMapping) + processor, p, msg.NewTuple, rel, p.tableNameMapping[tableName], customTypeMapping, schemaName) if err != nil { return nil, fmt.Errorf("error converting new tuple to map: %w", err) } @@ -847,8 +923,12 @@ func processDeleteMessage[Items model.Items]( return nil, fmt.Errorf("unknown relation id: %d", relID) } - items, _, err := processTuple(processor, p, msg.OldTuple, rel, - p.tableNameMapping[tableName].Exclude, customTypeMapping) + schemaName, err := p.getSourceSchemaForDestinationColumn(relID, tableName) + if err != nil { + return nil, err + } + + items, _, err := processTuple(processor, p, msg.OldTuple, rel, p.tableNameMapping[tableName], customTypeMapping, schemaName) if err != nil { return nil, fmt.Errorf("error converting tuple to map: %w", err) } @@ -907,11 +987,11 @@ func processRelationMessage[Items model.Items]( for _, column := range currRel.Columns { switch prevSchema.System { case protos.TypeSystem_Q: - qKind := p.postgresOIDToQValueKind(column.DataType, customTypeMapping) - if qKind == qvalue.QValueKindInvalid { + qKind := p.postgresOIDToQValueKind(column.DataType, customTypeMapping, p.internalVersion) + if qKind == types.QValueKindInvalid { typeName, ok := customTypeMapping[column.DataType] if ok { - qKind = customTypeToQKind(typeName) + qKind = postgres.CustomTypeToQKind(typeName, p.internalVersion) } } currRelMap[column.Name] = string(qKind) diff --git a/flow/connectors/postgres/client.go b/flow/connectors/postgres/client.go index bfd7d92529..20fec33bec 100644 --- a/flow/connectors/postgres/client.go +++ b/flow/connectors/postgres/client.go @@ -13,13 +13,12 @@ import ( "github.com/jackc/pgx/v5" "github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgtype" - "github.com/lib/pq/oid" "github.com/PeerDB-io/peerdb/flow/connectors/utils" - numeric "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/shared" + numeric "github.com/PeerDB-io/peerdb/flow/shared/datatypes" ) const ( @@ -152,7 +151,7 @@ func (c *PostgresConnector) getUniqueColumns( } // Find the primary key index OID, for replica identity 'd'/default or 'f'/full - var pkIndexOID oid.Oid + var pkIndexOID uint32 err := c.conn.QueryRow(ctx, `SELECT indexrelid FROM pg_index WHERE indrelid = $1 AND indisprimary`, relID).Scan(&pkIndexOID) @@ -173,7 +172,7 @@ func (c *PostgresConnector) getReplicaIdentityIndexColumns( relID uint32, schemaTable *utils.SchemaTable, ) ([]string, error) { - var indexRelID oid.Oid + var indexRelID uint32 // Fetch the OID of the index used as the replica identity err := c.conn.QueryRow(ctx, `SELECT indexrelid FROM pg_index WHERE indrelid=$1 AND indisreplident=true`, @@ -189,7 +188,7 @@ func (c *PostgresConnector) getReplicaIdentityIndexColumns( } // getColumnNamesForIndex returns the column names for a given index. -func (c *PostgresConnector) getColumnNamesForIndex(ctx context.Context, indexOID oid.Oid) ([]string, error) { +func (c *PostgresConnector) getColumnNamesForIndex(ctx context.Context, indexOID uint32) ([]string, error) { rows, err := c.conn.Query(ctx, `SELECT a.attname FROM pg_index i JOIN pg_attribute a ON a.attrelid = i.indrelid AND a.attnum = ANY(i.indkey) diff --git a/flow/connectors/postgres/normalize_stmt_generator.go b/flow/connectors/postgres/normalize_stmt_generator.go index 0b85dbfa46..d090ff01ba 100644 --- a/flow/connectors/postgres/normalize_stmt_generator.go +++ b/flow/connectors/postgres/normalize_stmt_generator.go @@ -10,8 +10,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type normalizeStmtGenerator struct { @@ -49,10 +49,10 @@ func (n *normalizeStmtGenerator) generateExpr( pgType string, ) string { if normalizedTableSchema.System == protos.TypeSystem_Q { - qkind := qvalue.QValueKind(genericColumnType) + qkind := types.QValueKind(genericColumnType) if qkind.IsArray() { return fmt.Sprintf("ARRAY(SELECT JSON_ARRAY_ELEMENTS_TEXT((_peerdb_data->>%s)::JSON))::%s", stringCol, pgType) - } else if qkind == qvalue.QValueKindBytes { + } else if qkind == types.QValueKindBytes { return fmt.Sprintf("decode(_peerdb_data->>%s, 'base64')::%s", stringCol, pgType) } } diff --git a/flow/connectors/postgres/postgres.go b/flow/connectors/postgres/postgres.go index 9d85334da0..d3b9051623 100644 --- a/flow/connectors/postgres/postgres.go +++ b/flow/connectors/postgres/postgres.go @@ -436,7 +436,11 @@ func pullCore[Items model.Items]( } handleInheritanceForNonPartitionedTables, err := internal.PeerDBPostgresCDCHandleInheritanceForNonPartitionedTables(ctx, req.Env) if err != nil { - return fmt.Errorf("failed to get get setting for handleInheritanceForNonPartitionedTables: %v", err) + return fmt.Errorf("failed to get get setting for handleInheritanceForNonPartitionedTables: %w", err) + } + sourceSchemaAsDestinationColumn, err := internal.PeerDBSourceSchemaAsDestinationColumn(ctx, req.Env) + if err != nil { + return fmt.Errorf("failed to get get setting for sourceSchemaAsDestinationColumn: %w", err) } cdc, err := c.NewPostgresCDCSource(ctx, &PostgresCDCConfig{ @@ -450,6 +454,8 @@ func pullCore[Items model.Items]( Slot: slotName, Publication: publicationName, HandleInheritanceForNonPartitionedTables: handleInheritanceForNonPartitionedTables, + SourceSchemaAsDestinationColumn: sourceSchemaAsDestinationColumn, + InternalVersion: req.InternalVersion, }) if err != nil { c.logger.Error("error creating cdc source", slog.Any("error", err)) @@ -775,13 +781,14 @@ func (c *PostgresConnector) CreateRawTable(ctx context.Context, req *protos.Crea func (c *PostgresConnector) GetTableSchema( ctx context.Context, env map[string]string, + version uint32, system protos.TypeSystem, tableMapping []*protos.TableMapping, ) (map[string]*protos.TableSchema, error) { res := make(map[string]*protos.TableSchema, len(tableMapping)) for _, tm := range tableMapping { - tableSchema, err := c.getTableSchemaForTable(ctx, env, tm, system) + tableSchema, err := c.getTableSchemaForTable(ctx, env, tm, system, version) if err != nil { c.logger.Info("error fetching schema", slog.String("table", tm.SourceTableIdentifier), slog.Any("error", err)) return nil, err @@ -843,6 +850,7 @@ func (c *PostgresConnector) getTableSchemaForTable( env map[string]string, tm *protos.TableMapping, system protos.TypeSystem, + version uint32, ) (*protos.TableSchema, error) { schemaTable, err := utils.ParseSchemaTable(tm.SourceTableIdentifier) if err != nil { @@ -915,7 +923,7 @@ func (c *PostgresConnector) getTableSchemaForTable( case protos.TypeSystem_PG: colType, err = c.postgresOIDToName(fieldDescription.DataTypeOID, customTypeMapping) case protos.TypeSystem_Q: - qColType := c.postgresOIDToQValueKind(fieldDescription.DataTypeOID, customTypeMapping) + qColType := c.postgresOIDToQValueKind(fieldDescription.DataTypeOID, customTypeMapping, version) colType = string(qColType) } if err != nil { diff --git a/flow/connectors/postgres/postgres_schema_delta_test.go b/flow/connectors/postgres/postgres_schema_delta_test.go index 4026dba2e3..1a5072e1b4 100644 --- a/flow/connectors/postgres/postgres_schema_delta_test.go +++ b/flow/connectors/postgres/postgres_schema_delta_test.go @@ -12,8 +12,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/e2eshared" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type PostgresSchemaDeltaTestSuite struct { @@ -63,7 +63,7 @@ func (s PostgresSchemaDeltaTestSuite) TestSimpleAddColumn() { AddedColumns: []*protos.FieldDescription{ { Name: "hi", - Type: string(qvalue.QValueKindInt64), + Type: string(types.QValueKindInt64), TypeModifier: -1, Nullable: true, }, @@ -71,7 +71,7 @@ func (s PostgresSchemaDeltaTestSuite) TestSimpleAddColumn() { }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, &protos.TableSchema{ @@ -81,12 +81,12 @@ func (s PostgresSchemaDeltaTestSuite) TestSimpleAddColumn() { Columns: []*protos.FieldDescription{ { Name: "id", - Type: string(qvalue.QValueKindInt32), + Type: string(types.QValueKindInt32), TypeModifier: -1, }, { Name: "hi", - Type: string(qvalue.QValueKindInt64), + Type: string(types.QValueKindInt64), TypeModifier: -1, Nullable: true, }, @@ -120,7 +120,7 @@ func (s PostgresSchemaDeltaTestSuite) TestAddAllColumnTypes() { }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, expectedTableSchema, output[tableName]) @@ -152,7 +152,7 @@ func (s PostgresSchemaDeltaTestSuite) TestAddTrickyColumnNames() { }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, expectedTableSchema, output[tableName]) @@ -184,7 +184,7 @@ func (s PostgresSchemaDeltaTestSuite) TestAddDropWhitespaceColumnNames() { }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, expectedTableSchema, output[tableName]) diff --git a/flow/connectors/postgres/qrep.go b/flow/connectors/postgres/qrep.go index 9d397879fd..09e85faf86 100644 --- a/flow/connectors/postgres/qrep.go +++ b/flow/connectors/postgres/qrep.go @@ -313,7 +313,7 @@ func corePullQRepRecords( if partition.FullTablePartition { c.logger.Info("pulling full table partition", partitionIdLog) - executor, err := c.NewQRepQueryExecutorSnapshot(ctx, config.SnapshotName, + executor, err := c.NewQRepQueryExecutorSnapshot(ctx, config.Version, config.SnapshotName, config.FlowJobName, partition.PartitionId) if err != nil { return 0, 0, fmt.Errorf("failed to create query executor: %w", err) @@ -355,7 +355,7 @@ func corePullQRepRecords( return 0, 0, err } - executor, err := c.NewQRepQueryExecutorSnapshot(ctx, config.SnapshotName, config.FlowJobName, partition.PartitionId) + executor, err := c.NewQRepQueryExecutorSnapshot(ctx, config.Version, config.SnapshotName, config.FlowJobName, partition.PartitionId) if err != nil { return 0, 0, fmt.Errorf("failed to create query executor: %w", err) } @@ -374,7 +374,7 @@ func (c *PostgresConnector) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { return syncQRepRecords(c, ctx, config, partition, RecordStreamSink{ QRecordStream: stream, }) @@ -385,7 +385,7 @@ func (c *PostgresConnector) SyncPgQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, pipe PgCopyReader, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { return syncQRepRecords(c, ctx, config, partition, pipe) } @@ -395,19 +395,19 @@ func syncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, sink QRepSyncSink, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { dstTable, err := utils.ParseSchemaTable(config.DestinationTableIdentifier) if err != nil { - return 0, fmt.Errorf("failed to parse destination table identifier: %w", err) + return 0, nil, fmt.Errorf("failed to parse destination table identifier: %w", err) } exists, err := c.tableExists(ctx, dstTable) if err != nil { - return 0, fmt.Errorf("failed to check if table exists: %w", err) + return 0, nil, fmt.Errorf("failed to check if table exists: %w", err) } if !exists { - return 0, fmt.Errorf("table %s does not exist, used schema: %s", dstTable.Table, dstTable.Schema) + return 0, nil, fmt.Errorf("table %s does not exist, used schema: %s", dstTable.Table, dstTable.Schema) } c.logger.Info("SyncRecords called and initial checks complete.") @@ -427,17 +427,17 @@ func syncQRepRecords( txConfig := c.conn.Config() txConn, err := pgx.ConnectConfig(ctx, txConfig) if err != nil { - return 0, fmt.Errorf("failed to create tx pool: %w", err) + return 0, nil, fmt.Errorf("failed to create tx pool: %w", err) } defer txConn.Close(ctx) - if err := shared.RegisterHStore(ctx, txConn); err != nil { - return 0, fmt.Errorf("failed to register hstore: %w", err) + if err := shared.RegisterExtensions(ctx, txConn, config.Version); err != nil { + return 0, nil, fmt.Errorf("failed to register extensions: %w", err) } tx, err := txConn.Begin(ctx) if err != nil { - return 0, fmt.Errorf("failed to begin transaction: %w", err) + return 0, nil, fmt.Errorf("failed to begin transaction: %w", err) } defer shared.RollbackTx(tx, c.logger) @@ -453,13 +453,13 @@ func syncQRepRecords( _, err = c.execWithLoggingTx(ctx, "TRUNCATE TABLE "+dstTable.String(), tx) if err != nil { - return -1, fmt.Errorf("failed to TRUNCATE table before copy: %w", err) + return -1, nil, fmt.Errorf("failed to TRUNCATE table before copy: %w", err) } } numRowsSynced, err = sink.CopyInto(ctx, c, tx, pgx.Identifier{dstTable.Schema, dstTable.Table}) if err != nil { - return -1, fmt.Errorf("failed to copy records into destination table: %w", err) + return -1, nil, fmt.Errorf("failed to copy records into destination table: %w", err) } if syncedAtCol != "" { @@ -470,7 +470,7 @@ func syncQRepRecords( utils.QuoteIdentifier(syncedAtCol), ) if _, err := tx.Exec(ctx, updateSyncedAtStmt); err != nil { - return -1, fmt.Errorf("failed to update synced_at column: %w", err) + return -1, nil, fmt.Errorf("failed to update synced_at column: %w", err) } } } else { @@ -482,7 +482,7 @@ func syncQRepRecords( // From PG docs: The cost of setting a large value in sessions that do not actually need many // temporary buffers is only a buffer descriptor, or about 64 bytes, per increment in temp_buffers. if _, err := tx.Exec(ctx, "SET temp_buffers = '4GB';"); err != nil { - return -1, fmt.Errorf("failed to set temp_buffers: %w", err) + return -1, nil, fmt.Errorf("failed to set temp_buffers: %w", err) } createStagingTableStmt := fmt.Sprintf( @@ -494,13 +494,13 @@ func syncQRepRecords( c.logger.Info(fmt.Sprintf("Creating staging table %s - '%s'", stagingTableName, createStagingTableStmt), syncLog) if _, err := c.execWithLoggingTx(ctx, createStagingTableStmt, tx); err != nil { - return -1, fmt.Errorf("failed to create staging table: %w", err) + return -1, nil, fmt.Errorf("failed to create staging table: %w", err) } // Step 2.2: Insert records into the staging table numRowsSynced, err = sink.CopyInto(ctx, c, tx, stagingTableIdentifier) if err != nil { - return -1, fmt.Errorf("failed to copy records into staging table: %w", err) + return -1, nil, fmt.Errorf("failed to copy records into staging table: %w", err) } // construct the SET clause for the upsert operation @@ -512,7 +512,7 @@ func syncQRepRecords( columnNames, err := sink.GetColumnNames() if err != nil { - return -1, fmt.Errorf("faild to get column names: %w", err) + return -1, nil, fmt.Errorf("faild to get column names: %w", err) } setClauseArray := make([]string, 0, len(upsertMatchColsList)+1) selectStrArray := make([]string, 0, len(columnNames)) @@ -542,7 +542,7 @@ func syncQRepRecords( ) c.logger.Info("Performing upsert operation", slog.String("upsertStmt", upsertStmt), syncLog) if _, err := tx.Exec(ctx, upsertStmt); err != nil { - return -1, fmt.Errorf("failed to perform upsert operation: %w", err) + return -1, nil, fmt.Errorf("failed to perform upsert operation: %w", err) } } @@ -551,7 +551,7 @@ func syncQRepRecords( // marshal the partition to json using protojson pbytes, err := protojson.Marshal(partition) if err != nil { - return -1, fmt.Errorf("failed to marshal partition to json: %w", err) + return -1, nil, fmt.Errorf("failed to marshal partition to json: %w", err) } metadataTableIdentifier := pgx.Identifier{c.metadataSchema, qRepMetadataTableName} @@ -569,15 +569,15 @@ func syncQRepRecords( startTime, time.Now(), ); err != nil { - return -1, fmt.Errorf("failed to execute statements in a transaction: %w", err) + return -1, nil, fmt.Errorf("failed to execute statements in a transaction: %w", err) } if err := tx.Commit(ctx); err != nil { - return -1, fmt.Errorf("failed to commit transaction: %w", err) + return -1, nil, fmt.Errorf("failed to commit transaction: %w", err) } c.logger.Info(fmt.Sprintf("pushed %d records to %s", numRowsSynced, dstTable), syncLog) - return numRowsSynced, nil + return numRowsSynced, nil, nil } // SetupQRepMetadataTables function for postgres connector @@ -637,7 +637,7 @@ func pullXminRecordStream( queryArgs = []any{strconv.FormatInt(partition.Range.Range.(*protos.PartitionRange_IntRange).IntRange.Start&0xffffffff, 10)} } - executor, err := c.NewQRepQueryExecutorSnapshot(ctx, config.SnapshotName, + executor, err := c.NewQRepQueryExecutorSnapshot(ctx, config.Version, config.SnapshotName, config.FlowJobName, partition.PartitionId) if err != nil { return 0, 0, 0, fmt.Errorf("failed to create query executor: %w", err) diff --git a/flow/connectors/postgres/qrep_bench_test.go b/flow/connectors/postgres/qrep_bench_test.go index f0ce749322..9d3bd286ec 100644 --- a/flow/connectors/postgres/qrep_bench_test.go +++ b/flow/connectors/postgres/qrep_bench_test.go @@ -6,6 +6,7 @@ import ( "github.com/stretchr/testify/require" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared" ) func BenchmarkQRepQueryExecutor(b *testing.B) { @@ -17,7 +18,7 @@ func BenchmarkQRepQueryExecutor(b *testing.B) { defer connector.Close() // Create a new QRepQueryExecutor instance - qe, err := connector.NewQRepQueryExecutor(ctx, "test flow", "test part") + qe, err := connector.NewQRepQueryExecutor(ctx, shared.InternalVersion_Latest, "test flow", "test part") require.NoError(b, err, "error while creating QRepQueryExecutor") // Run the benchmark diff --git a/flow/connectors/postgres/qrep_partition_test.go b/flow/connectors/postgres/qrep_partition_test.go index 5b50979912..6de413e509 100644 --- a/flow/connectors/postgres/qrep_partition_test.go +++ b/flow/connectors/postgres/qrep_partition_test.go @@ -65,6 +65,7 @@ func newTestCaseForCTID(schema string, name string, rows uint32, expectedNum int } func TestGetQRepPartitions(t *testing.T) { + t.Parallel() connStr := internal.GetCatalogConnectionStringFromEnv(t.Context()) // Setup the DB @@ -203,8 +204,7 @@ func TestGetQRepPartitions(t *testing.T) { } // Drop the schema at the end - _, err = conn.Exec(t.Context(), fmt.Sprintf(`DROP SCHEMA %s CASCADE;`, schemaName)) - if err != nil { + if _, err := conn.Exec(t.Context(), fmt.Sprintf(`DROP SCHEMA %s CASCADE;`, schemaName)); err != nil { t.Fatalf("Failed to drop schema: %v", err) } } diff --git a/flow/connectors/postgres/qrep_query_executor.go b/flow/connectors/postgres/qrep_query_executor.go index 97244c4e56..782f07f8c8 100644 --- a/flow/connectors/postgres/qrep_query_executor.go +++ b/flow/connectors/postgres/qrep_query_executor.go @@ -10,10 +10,10 @@ import ( "github.com/jackc/pgx/v5/pgtype" "go.temporal.io/sdk/log" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type QRepQueryExecutor struct { @@ -22,15 +22,16 @@ type QRepQueryExecutor struct { snapshot string flowJobName string partitionID string + version uint32 } -func (c *PostgresConnector) NewQRepQueryExecutor(ctx context.Context, +func (c *PostgresConnector) NewQRepQueryExecutor(ctx context.Context, version uint32, flowJobName string, partitionID string, ) (*QRepQueryExecutor, error) { - return c.NewQRepQueryExecutorSnapshot(ctx, "", flowJobName, partitionID) + return c.NewQRepQueryExecutorSnapshot(ctx, version, "", flowJobName, partitionID) } -func (c *PostgresConnector) NewQRepQueryExecutorSnapshot(ctx context.Context, +func (c *PostgresConnector) NewQRepQueryExecutorSnapshot(ctx context.Context, version uint32, snapshot string, flowJobName string, partitionID string, ) (*QRepQueryExecutor, error) { _, err := c.fetchCustomTypeMapping(ctx) @@ -44,6 +45,7 @@ func (c *PostgresConnector) NewQRepQueryExecutorSnapshot(ctx context.Context, flowJobName: flowJobName, partitionID: partitionID, logger: log.With(c.logger, slog.String(string(shared.PartitionIDKey), partitionID)), + version: version, }, nil } @@ -70,14 +72,14 @@ func (qe *QRepQueryExecutor) executeQueryInTx(ctx context.Context, tx pgx.Tx, cu } // FieldDescriptionsToSchema converts a slice of pgconn.FieldDescription to a QRecordSchema. -func (qe *QRepQueryExecutor) fieldDescriptionsToSchema(fds []pgconn.FieldDescription) qvalue.QRecordSchema { - qfields := make([]qvalue.QField, len(fds)) +func (qe *QRepQueryExecutor) fieldDescriptionsToSchema(fds []pgconn.FieldDescription) types.QRecordSchema { + qfields := make([]types.QField, len(fds)) for i, fd := range fds { - ctype := qe.postgresOIDToQValueKind(fd.DataTypeOID, qe.customTypeMapping) + ctype := qe.postgresOIDToQValueKind(fd.DataTypeOID, qe.customTypeMapping, qe.version) // there isn't a way to know if a column is nullable or not - if ctype == qvalue.QValueKindNumeric { + if ctype == types.QValueKindNumeric || ctype == types.QValueKindArrayNumeric { precision, scale := datatypes.ParseNumericTypmod(fd.TypeModifier) - qfields[i] = qvalue.QField{ + qfields[i] = types.QField{ Name: fd.Name, Type: ctype, Nullable: true, @@ -85,14 +87,14 @@ func (qe *QRepQueryExecutor) fieldDescriptionsToSchema(fds []pgconn.FieldDescrip Scale: scale, } } else { - qfields[i] = qvalue.QField{ + qfields[i] = types.QField{ Name: fd.Name, Type: ctype, Nullable: true, } } } - return qvalue.NewQRecordSchema(qfields) + return types.NewQRecordSchema(qfields) } func (qe *QRepQueryExecutor) processRowsStream( @@ -308,9 +310,9 @@ func (qe *QRepQueryExecutor) ExecuteQueryIntoSinkGettingCurrentSnapshotXmin( func (qe *QRepQueryExecutor) mapRowToQRecord( row pgx.Rows, fds []pgconn.FieldDescription, -) ([]qvalue.QValue, error) { +) ([]types.QValue, error) { // make vals an empty array of QValue of size len(fds) - record := make([]qvalue.QValue, len(fds)) + record := make([]types.QValue, len(fds)) values, err := row.Values() if err != nil { @@ -319,7 +321,7 @@ func (qe *QRepQueryExecutor) mapRowToQRecord( } for i, fd := range fds { - tmp, err := qe.parseFieldFromPostgresOID(fd.DataTypeOID, values[i], qe.customTypeMapping) + tmp, err := qe.parseFieldFromPostgresOID(fd.DataTypeOID, fd.TypeModifier, values[i], qe.customTypeMapping, qe.version) if err != nil { qe.logger.Error("[pg_query_executor] failed to parse field", slog.Any("error", err)) return nil, fmt.Errorf("failed to parse field: %w", err) diff --git a/flow/connectors/postgres/qrep_query_executor_test.go b/flow/connectors/postgres/qrep_query_executor_test.go index 10cf68eba3..aae5d0db9a 100644 --- a/flow/connectors/postgres/qrep_query_executor_test.go +++ b/flow/connectors/postgres/qrep_query_executor_test.go @@ -12,9 +12,10 @@ import ( "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared" ) -func setupDB(t *testing.T) (*PostgresConnector, string) { +func setupDB(t *testing.T, testName string) (*PostgresConnector, string) { t.Helper() connector, err := NewPostgresConnector(t.Context(), @@ -22,7 +23,7 @@ func setupDB(t *testing.T) (*PostgresConnector, string) { require.NoError(t, err, "error while creating connector") // Create unique schema name using current time - schemaName := fmt.Sprintf("qrep_query_executor_%d", time.Now().Unix()) + schemaName := fmt.Sprintf("qrep_query_executor_%s_%d", testName, time.Now().Unix()) // Create the schema _, err = connector.conn.Exec(t.Context(), @@ -41,8 +42,9 @@ func teardownDB(t *testing.T, conn *pgx.Conn, schemaName string) { } func TestExecuteAndProcessQuery(t *testing.T) { + t.Parallel() ctx := t.Context() - connector, schemaName := setupDB(t) + connector, schemaName := setupDB(t, "query") conn := connector.conn defer connector.Close() defer teardownDB(t, conn, schemaName) @@ -55,7 +57,7 @@ func TestExecuteAndProcessQuery(t *testing.T) { fmt.Sprintf("INSERT INTO %s.test(data) VALUES ('testdata')", utils.QuoteIdentifier(schemaName))) require.NoError(t, err, "error while inserting data") - qe, err := connector.NewQRepQueryExecutor(ctx, "test flow", "test part") + qe, err := connector.NewQRepQueryExecutor(ctx, shared.InternalVersion_Latest, "test flow", "test part") require.NoError(t, err, "error while creating QRepQueryExecutor") batch, err := qe.ExecuteAndProcessQuery(t.Context(), fmt.Sprintf("SELECT * FROM %s.test", utils.QuoteIdentifier(schemaName))) @@ -64,9 +66,10 @@ func TestExecuteAndProcessQuery(t *testing.T) { require.Equal(t, "testdata", batch.Records[0][1].Value(), "expected 'testdata'") } -func TestAllDataTypes(t *testing.T) { +func TestSupportedDataTypes(t *testing.T) { + t.Parallel() ctx := t.Context() - connector, schemaName := setupDB(t) + connector, schemaName := setupDB(t, "datatypes") conn := connector.conn defer conn.Close(ctx) defer teardownDB(t, conn, schemaName) @@ -141,7 +144,7 @@ func TestAllDataTypes(t *testing.T) { ) require.NoError(t, err, "error while inserting into test table") - qe, err := connector.NewQRepQueryExecutor(ctx, "test flow", "test part") + qe, err := connector.NewQRepQueryExecutor(ctx, shared.InternalVersion_Latest, "test flow", "test part") require.NoError(t, err, "error while creating QRepQueryExecutor") // Select the row back out of the table batch, err := qe.ExecuteAndProcessQuery(t.Context(), @@ -189,3 +192,434 @@ func TestAllDataTypes(t *testing.T) { actualNumeric := record[10].Value().(decimal.Decimal).String() require.Equal(t, expectedNumeric, actualNumeric, "expected 123.456") } + +func TestStringDataTypes(t *testing.T) { + t.Parallel() + ctx := t.Context() + + tests := []struct { + Type string + Literal string // skipped if empty + Expected string // skipped if empty + ArrayLiteral string // skipped if empty + ArrayExpected []string // skipped if empty + }{ + { + Type: "text", + Literal: "'abc'", + Expected: "abc", + ArrayLiteral: "ARRAY['abc', 'def', NULL]", + ArrayExpected: []string{"abc", "def", ""}, + }, + { + Type: "bytea", + Literal: "", + Expected: "", + ArrayLiteral: `'{"\\x012345", "\\x6789ab", NULL}'::bytea[]`, + ArrayExpected: []string{"\x01\x23\x45", "\x67\x89\xab", ""}, + }, + { + Type: "bit(3)", + Literal: "b'101'", + Expected: "101", + ArrayLiteral: "ARRAY[b'101', b'111', NULL]", + ArrayExpected: []string{"101", "111", ""}, + }, + { + Type: "varbit", + Literal: "b'101'", + Expected: "101", + ArrayLiteral: "ARRAY[b'1', b'101', NULL]", + ArrayExpected: []string{"1", "101", ""}, + }, + { + Type: "xml", + Literal: "'data'::xml", + Expected: "data", + ArrayLiteral: `'{"value", "data", NULL}'::xml[]`, + ArrayExpected: []string{"value", "data", ""}, + }, + { + Type: "time", + Literal: "", + Expected: "", + ArrayLiteral: `'{"12:30:45", "18:15:30", NULL}'::time[]`, + ArrayExpected: []string{"12:30:45.000000", "18:15:30.000000", ""}, + }, + { + Type: "timetz", + Literal: "", + Expected: "", + ArrayLiteral: `'{"12:30:45+05", "18:15:30-08", NULL}'::timetz[]`, + ArrayExpected: []string{"12:30:45+05", "18:15:30-08", ""}, + }, + { + Type: "interval", + Literal: "'5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds'::interval", + Expected: "{\"minutes\":1,\"seconds\":2.22,\"days\":29,\"months\":2,\"years\":5,\"valid\":true}", + ArrayLiteral: `'{"1 day", "2 hours 30 minutes", NULL}'::interval[]`, + ArrayExpected: []string{"{\"days\":1,\"valid\":true}", "{\"hours\":2,\"minutes\":30,\"valid\":true}", ""}, + }, + { + Type: "point", + Literal: "", + Expected: "", + ArrayLiteral: `'{"(1,2)", "(3,4)", NULL}'::point[]`, + ArrayExpected: []string{"(1,2)", "(3,4)", ""}, + }, + { + Type: "line", + Literal: "'{1,-1,0}'::line", + Expected: "{1,-1,0}", + ArrayLiteral: `'{"{1,-1,0}", "{2,-1,3}", NULL}'::line[]`, + ArrayExpected: []string{"{1,-1,0}", "{2,-1,3}", ""}, + }, + { + Type: "lseg", + Literal: "'[(1,1),(2,2)]'::lseg", + Expected: "[(1,1),(2,2)]", + ArrayLiteral: `'{"[(1,1),(2,2)]", "[(3,3),(4,4)]", NULL}'::lseg[]`, + ArrayExpected: []string{"[(1,1),(2,2)]", "[(3,3),(4,4)]", ""}, + }, + { + Type: "path", + Literal: "'((1,1),(2,2),(3,1))'::path", + Expected: "((1,1),(2,2),(3,1))", + ArrayLiteral: `'{"((1,1),(2,2),(3,1))", "((4,4),(5,5),(6,4))", NULL}'::path[]`, + ArrayExpected: []string{"((1,1),(2,2),(3,1))", "((4,4),(5,5),(6,4))", ""}, + }, + { + Type: "box", + Literal: "'((1,1),(3,3))'::box", + Expected: "(3,3),(1,1)", + ArrayLiteral: `array['((1,1),(3,3))','((4,4),(6,6))', NULL]::box[]`, + ArrayExpected: []string{"(3,3),(1,1)", "(6,6),(4,4)", ""}, + }, + { + Type: "polygon", + Literal: "'((1,1),(2,2),(3,1))'::polygon", + Expected: "((1,1),(2,2),(3,1))", + ArrayLiteral: `'{"((1,1),(2,2),(3,1))", "((4,4),(5,5),(6,4))", NULL}'::polygon[]`, + ArrayExpected: []string{"((1,1),(2,2),(3,1))", "((4,4),(5,5),(6,4))", ""}, + }, + { + Type: "circle", + Literal: "'<(1,-1),2>'::circle", + Expected: "<(1,-1),2>", + ArrayLiteral: `'{"<(1,-1),2>", "<(2,-1),3>", NULL}'::circle[]`, + ArrayExpected: []string{"<(1,-1),2>", "<(2,-1),3>", ""}, + }, + { + Type: "macaddr", + Literal: "", + Expected: "", + ArrayLiteral: `'{"08:00:2b:01:02:03", "08:00:2b:01:02:04", NULL}'::macaddr[]`, + ArrayExpected: []string{"08:00:2b:01:02:03", "08:00:2b:01:02:04", ""}, + }, + { + Type: "cidr", + Literal: "", + Expected: "", + ArrayLiteral: `'{"192.168.1.0/24", "10.0.0.0/8", NULL}'::cidr[]`, + ArrayExpected: []string{"192.168.1.0/24", "10.0.0.0/8", ""}, + }, + { + Type: "inet", + Literal: "", + Expected: "", + ArrayLiteral: `'{"192.168.1.1/32", "10.0.0.1/32", NULL}'::inet[]`, + ArrayExpected: []string{"192.168.1.1/32", "10.0.0.1/32", ""}, + }, + { + Type: "int4range", + Literal: "'[1,100]'::int4range", + Expected: "[1,101)", + ArrayLiteral: `'{"[1,100]", "[200,300]", NULL}'::int4range[]`, + ArrayExpected: []string{"[1,101)", "[200,301)", ""}, + }, + { + Type: "int4range", + Literal: "'(,)'::int4range", + Expected: "(,)", + ArrayLiteral: `'{"(,)", "(,)"}'::int4range[]`, + ArrayExpected: []string{"(,)", "(,)"}, + }, + { + Type: "int8range", + Literal: "'[1,10000000000]'::int8range", + Expected: "[1,10000000001)", + ArrayLiteral: `'{"[1,10000000000]", "[20000000000,30000000000]", NULL}'::int8range[]`, + ArrayExpected: []string{"[1,10000000001)", "[20000000000,30000000001)", ""}, + }, + { + Type: "int8range", + Literal: "'(,)'::int8range", + Expected: "(,)", + ArrayLiteral: `'{"(,)", "(,)"}'::int8range[]`, + ArrayExpected: []string{"(,)", "(,)"}, + }, + { + Type: "numrange", + Literal: "'[1.5,99.9]'::numrange", + Expected: "[1.5,99.9]", + ArrayLiteral: `'{"[1.5,99.9]", "[200.1,300.8]", NULL}'::numrange[]`, + ArrayExpected: []string{"[1.5,99.9]", "[200.1,300.8]", ""}, + }, + { + Type: "numrange", + Literal: "'(,)'::numrange", + Expected: "(,)", + ArrayLiteral: `'{"(,)", "(,)"}'::numrange[]`, + ArrayExpected: []string{"(,)", "(,)"}, + }, + { + Type: "tsrange", + Literal: "'[2023-01-01 00:00:00,2023-12-31 23:59:59]'::tsrange", + Expected: "[2023-01-01 00:00:00,2023-12-31 23:59:59]", + ArrayLiteral: `'{"[2023-01-01 00:00:00,2023-12-31 23:59:59]", "[2024-01-01 00:00:00,2024-12-31 23:59:59]", NULL}'::tsrange[]`, + ArrayExpected: []string{"[2023-01-01 00:00:00,2023-12-31 23:59:59]", "[2024-01-01 00:00:00,2024-12-31 23:59:59]", ""}, + }, + { + Type: "tsrange", + Literal: "'(,)'::tsrange", + Expected: "(,)", + ArrayLiteral: `'{"(,)", "(,)"}'::tsrange[]`, + ArrayExpected: []string{"(,)", "(,)"}, + }, + { + Type: "tstzrange", + Literal: "'[2023-01-01 00:00:00-02,2023-12-31 23:59:59+00]'::tstzrange", + Expected: "[2023-01-01 02:00:00Z,2023-12-31 23:59:59Z]", + ArrayLiteral: `'{` + + `"[2023-01-01 00:00:00-02,2023-12-31 23:59:59+00]",` + + `"[2024-01-01 00:00:00-02,2024-12-31 23:59:59+00]",` + + `NULL` + + `}'::tstzrange[]`, + ArrayExpected: []string{ + "[2023-01-01 02:00:00Z,2023-12-31 23:59:59Z]", + "[2024-01-01 02:00:00Z,2024-12-31 23:59:59Z]", + "", + }, + }, + { + Type: "tstzrange", + Literal: "'(,)'::tstzrange", + Expected: "(,)", + ArrayLiteral: `'{"(,)", "(,)"}'::tstzrange[]`, + ArrayExpected: []string{"(,)", "(,)"}, + }, + { + Type: "daterange", + Literal: "'[2023-01-01,2023-12-31]'::daterange", + Expected: "[2023-01-01,2024-01-01)", + ArrayLiteral: `'{"[2023-01-01,2023-12-31]", "[2024-01-01,2024-12-31]", NULL}'::daterange[]`, + ArrayExpected: []string{"[2023-01-01,2024-01-01)", "[2024-01-01,2025-01-01)", ""}, + }, + { + Type: "daterange", + Literal: "'(,)'::daterange", + Expected: "(,)", + ArrayLiteral: `'{"(,)", "(,)"}'::daterange[]`, + ArrayExpected: []string{"(,)", "(,)"}, + }, + { + Type: "int4multirange", + Literal: "'{[1,10],[20,30]}'::int4multirange", + Expected: "{[1,11),[20,31)}", + ArrayLiteral: `'{"{[1,10],[20,30]}", "{[100,110],[120,130]}", NULL}'::int4multirange[]`, + ArrayExpected: []string{"{[1,11),[20,31)}", "{[100,111),[120,131)}", ""}, + }, + { + Type: "int4multirange", + Literal: "'{(,10],[20,)}'::int4multirange", + Expected: "{(,11),[20,)}", + ArrayLiteral: `'{"{(,10],[20,)}", "{(,110],[120,)}"}'::int4multirange[]`, + ArrayExpected: []string{"{(,11),[20,)}", "{(,111),[120,)}"}, + }, + { + Type: "int8multirange", + Literal: "'{[1,10000000000],[20000000000,30000000000]}'::int8multirange", + Expected: "{[1,10000000001),[20000000000,30000000001)}", + ArrayLiteral: `'{` + + `"{[1,10000000000],[20000000000,30000000000]}",` + + `"{[40000000000,50000000000],[60000000000,70000000000]}",` + + `NULL` + + `}'::int8multirange[]`, + ArrayExpected: []string{ + "{[1,10000000001),[20000000000,30000000001)}", + "{[40000000000,50000000001),[60000000000,70000000001)}", + "", + }, + }, + { + Type: "int8multirange", + Literal: "'{(,10000000000],[20000000000,)}'::int8multirange", + Expected: "{(,10000000001),[20000000000,)}", + ArrayLiteral: `'{"{(,10000000000],[20000000000,)}", "{(,50000000000],[60000000000,)}"}'::int8multirange[]`, + ArrayExpected: []string{"{(,10000000001),[20000000000,)}", "{(,50000000001),[60000000000,)}"}, + }, + { + Type: "nummultirange", + Literal: "'{[1.1,10.9],[20.1,30.9]}'::nummultirange", + Expected: "{[1.1,10.9],[20.1,30.9]}", + ArrayLiteral: `'{` + + `"{[1.1,10.9],[20.1,30.9]}",` + + `"{[100.1,110.9],[120.1,130.9]}",` + + `NULL` + + `}'::nummultirange[]`, + ArrayExpected: []string{ + "{[1.1,10.9],[20.1,30.9]}", + "{[100.1,110.9],[120.1,130.9]}", + "", + }, + }, + { + Type: "nummultirange", + Literal: "'{(,10.9],[20.1,)}'::nummultirange", + Expected: "{(,10.9],[20.1,)}", + ArrayLiteral: `'{"{(,10.9],[20.1,)}", "{(,110.9],[120.1,)}"}'::nummultirange[]`, + ArrayExpected: []string{"{(,10.9],[20.1,)}", "{(,110.9],[120.1,)}"}, + }, + { + Type: "tsmultirange", + Literal: "'{[2023-01-01 00:00:00,2023-01-31 23:59:59],[2023-03-01 00:00:00,2023-03-31 23:59:59]}'::tsmultirange", + Expected: "{[2023-01-01 00:00:00,2023-01-31 23:59:59],[2023-03-01 00:00:00,2023-03-31 23:59:59]}", + ArrayLiteral: `'{` + + `"{[2023-01-01 00:00:00,2023-01-31 23:59:59],[2023-03-01 00:00:00,2023-03-31 23:59:59]}",` + + `"{[2024-01-01 00:00:00,2024-01-31 23:59:59],[2024-03-01 00:00:00,2024-03-31 23:59:59]}",` + + `NULL` + + `}'::tsmultirange[]`, + ArrayExpected: []string{ + "{[\"2023-01-01 00:00:00\",\"2023-01-31 23:59:59\"],[\"2023-03-01 00:00:00\",\"2023-03-31 23:59:59\"]}", + "{[\"2024-01-01 00:00:00\",\"2024-01-31 23:59:59\"],[\"2024-03-01 00:00:00\",\"2024-03-31 23:59:59\"]}", + "", + }, + }, + { + Type: "tsmultirange", + Literal: "'{(,2023-01-31 23:59:59],[2023-03-01 00:00:00,)}'::tsmultirange", + Expected: "{(,2023-01-31 23:59:59],[2023-03-01 00:00:00,)}", + ArrayLiteral: `'{` + + `"{(,2023-01-31 23:59:59],[2023-03-01 00:00:00,)}",` + + `"{(,2024-01-31 23:59:59],[2024-03-01 00:00:00,)}"` + + `}'::tsmultirange[]`, + ArrayExpected: []string{ + "{(,\"2023-01-31 23:59:59\"],[\"2023-03-01 00:00:00\",)}", + "{(,\"2024-01-31 23:59:59\"],[\"2024-03-01 00:00:00\",)}", + }, + }, + { + Type: "tstzmultirange", + Literal: "'{[2023-01-01 00:00:00-02,2023-01-31 23:59:59+00],[2023-03-01 00:00:00-02,2023-03-31 23:59:59+00]}'::tstzmultirange", + Expected: "{[2023-01-01 02:00:00Z,2023-01-31 23:59:59Z],[2023-03-01 02:00:00Z,2023-03-31 23:59:59Z]}", + ArrayLiteral: `'{"{[2023-01-01 00:00:00-02,2023-01-31 23:59:59+00],[2023-03-01 00:00:00-02,2023-03-31 23:59:59+00]}",` + + `"{[2024-01-01 00:00:00-02,2024-01-31 23:59:59+00],[2024-03-01 00:00:00-02,2024-03-31 23:59:59+00]}",` + + `NULL` + + `}'::tstzmultirange[]`, + ArrayExpected: []string{ + "{[\"2023-01-01 02:00:00+00\",\"2023-01-31 23:59:59+00\"],[\"2023-03-01 02:00:00+00\",\"2023-03-31 23:59:59+00\"]}", + "{[\"2024-01-01 02:00:00+00\",\"2024-01-31 23:59:59+00\"],[\"2024-03-01 02:00:00+00\",\"2024-03-31 23:59:59+00\"]}", + "", + }, + }, + { + Type: "tstzmultirange", + Literal: "'{(,2023-01-31 23:59:59+00],[2023-03-01 00:00:00-02,)}'::tstzmultirange", + Expected: "{(,2023-01-31 23:59:59Z],[2023-03-01 02:00:00Z,)}", + ArrayLiteral: `'{` + + `"{(,2023-01-31 23:59:59+00],[2023-03-01 00:00:00-02,)}",` + + `"{(,2024-01-31 23:59:59+00],[2024-03-01 00:00:00-02,)}"` + + `}'::tstzmultirange[]`, + ArrayExpected: []string{ + "{(,\"2023-01-31 23:59:59+00\"],[\"2023-03-01 02:00:00+00\",)}", + "{(,\"2024-01-31 23:59:59+00\"],[\"2024-03-01 02:00:00+00\",)}", + }, + }, + { + Type: "datemultirange", + Literal: "'{[2023-01-01,2023-01-31],[2023-03-01,2023-03-31]}'::datemultirange", + Expected: "{[2023-01-01,2023-02-01),[2023-03-01,2023-04-01)}", + ArrayLiteral: `'{` + + `"{[2023-01-01,2023-01-31],[2023-03-01,2023-03-31]}",` + + `"{[2024-01-01,2024-01-31],[2024-03-01,2024-03-31]}",` + + `NULL` + + `}'::datemultirange[]`, + ArrayExpected: []string{ + "{[2023-01-01,2023-02-01),[2023-03-01,2023-04-01)}", + "{[2024-01-01,2024-02-01),[2024-03-01,2024-04-01)}", + "", + }, + }, + { + Type: "datemultirange", + Literal: "'{(,2023-01-31],[2023-03-01,)}'::datemultirange", + Expected: "{(,2023-02-01),[2023-03-01,)}", + ArrayLiteral: `'{"{(,2023-01-31],[2023-03-01,)}","{(,2024-01-31],[2024-03-01,)}"}'::datemultirange[]`, + ArrayExpected: []string{"{(,2023-02-01),[2023-03-01,)}", "{(,2024-02-01),[2024-03-01,)}"}, + }, + } + + connector, schemaName := setupDB(t, "string") + conn := connector.conn + defer conn.Close(ctx) + defer teardownDB(t, conn, schemaName) + + for _, tc := range tests { + t.Run(tc.Type, func(t *testing.T) { + query := fmt.Sprintf( + "CREATE TABLE %s.test_strings(col %s, col_arr %s[])", + utils.QuoteIdentifier(schemaName), tc.Type, tc.Type, + ) + _, err := conn.Exec(ctx, query) + require.NoError(t, err) + + defer func() { + query := fmt.Sprintf( + "DROP TABLE %s.test_strings", utils.QuoteIdentifier(schemaName), + ) + _, err := conn.Exec(ctx, query) + require.NoError(t, err) + }() + + literal := tc.Literal + if literal == "" { + literal = "null" + } + arrayLiteral := tc.ArrayLiteral + if arrayLiteral == "" { + arrayLiteral = "null" + } + query = fmt.Sprintf( + "INSERT INTO %s.test_strings(col, col_arr) VALUES (%s, %s)", + utils.QuoteIdentifier(schemaName), literal, arrayLiteral, + ) + _, err = conn.Exec(ctx, query) + require.NoError(t, err) + + qe, err := connector.NewQRepQueryExecutor(ctx, shared.InternalVersion_Latest, "test flow", "test part") + require.NoError(t, err) + // Select the row back out of the table + batch, err := qe.ExecuteAndProcessQuery(t.Context(), + fmt.Sprintf("SELECT * FROM %s.test_strings", utils.QuoteIdentifier(schemaName))) + require.NoError(t, err) + require.Len(t, batch.Records, 1) + + // Retrieve the results. + record := batch.Records[0] + + if tc.Expected != "" { + str, ok := record[0].Value().(string) + require.True(t, ok) + require.Equal(t, tc.Expected, str) + } + if tc.ArrayExpected != nil { + strs, ok := record[1].Value().([]string) + require.True(t, ok) + require.Len(t, strs, len(tc.ArrayExpected)) + for i, expected := range tc.ArrayExpected { + require.Equal(t, expected, strs[i]) + } + } + }) + } +} diff --git a/flow/connectors/postgres/qvalue_convert.go b/flow/connectors/postgres/qvalue_convert.go index 1b0a467b9c..ecbf31f241 100644 --- a/flow/connectors/postgres/qvalue_convert.go +++ b/flow/connectors/postgres/qvalue_convert.go @@ -12,12 +12,13 @@ import ( "github.com/google/uuid" "github.com/jackc/pgx/v5/pgtype" - "github.com/lib/pq/oid" + "github.com/pgvector/pgvector-go" "github.com/shopspring/decimal" - datatypes "github.com/PeerDB-io/peerdb/flow/datatypes" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/postgres" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func (c *PostgresConnector) postgresOIDToName(recvOID uint32, customTypeMapping map[uint32]shared.CustomDataType) (string, error) { @@ -25,18 +26,18 @@ func (c *PostgresConnector) postgresOIDToName(recvOID uint32, customTypeMapping return ty.Name, nil } // workaround for some types not being defined by pgtype - switch oid.Oid(recvOID) { - case oid.T_timetz: + switch recvOID { + case pgtype.TimetzOID: return "timetz", nil - case oid.T_xml: + case pgtype.XMLOID: return "xml", nil - case oid.T_money: + case shared.MoneyOID: return "money", nil - case oid.T_txid_snapshot: + case shared.TxidSnapshotOID: return "txid_snapshot", nil - case oid.T_tsvector: + case shared.TsvectorOID: return "tsvector", nil - case oid.T_tsquery: + case shared.TsqueryOID: return "tsquery", nil default: typeData, ok := customTypeMapping[recvOID] @@ -50,198 +51,115 @@ func (c *PostgresConnector) postgresOIDToName(recvOID uint32, customTypeMapping func (c *PostgresConnector) postgresOIDToQValueKind( recvOID uint32, customTypeMapping map[uint32]shared.CustomDataType, -) qvalue.QValueKind { - switch recvOID { - case pgtype.BoolOID: - return qvalue.QValueKindBoolean - case pgtype.Int2OID: - return qvalue.QValueKindInt16 - case pgtype.Int4OID: - return qvalue.QValueKindInt32 - case pgtype.Int8OID: - return qvalue.QValueKindInt64 - case pgtype.Float4OID: - return qvalue.QValueKindFloat32 - case pgtype.Float8OID: - return qvalue.QValueKindFloat64 - case pgtype.QCharOID: - return qvalue.QValueKindQChar - case pgtype.TextOID, pgtype.VarcharOID, pgtype.BPCharOID: - return qvalue.QValueKindString - case pgtype.ByteaOID: - return qvalue.QValueKindBytes - case pgtype.JSONOID: - return qvalue.QValueKindJSON - case pgtype.JSONBOID: - return qvalue.QValueKindJSONB - case pgtype.UUIDOID: - return qvalue.QValueKindUUID - case pgtype.TimeOID: - return qvalue.QValueKindTime - case pgtype.DateOID: - return qvalue.QValueKindDate - case pgtype.CIDROID: - return qvalue.QValueKindCIDR - case pgtype.MacaddrOID: - return qvalue.QValueKindMacaddr - case pgtype.InetOID: - return qvalue.QValueKindINET - case pgtype.TimestampOID: - return qvalue.QValueKindTimestamp - case pgtype.TimestamptzOID: - return qvalue.QValueKindTimestampTZ - case pgtype.NumericOID: - return qvalue.QValueKindNumeric - case pgtype.Int2ArrayOID: - return qvalue.QValueKindArrayInt16 - case pgtype.Int4ArrayOID: - return qvalue.QValueKindArrayInt32 - case pgtype.Int8ArrayOID: - return qvalue.QValueKindArrayInt64 - case pgtype.PointOID: - return qvalue.QValueKindPoint - case pgtype.Float4ArrayOID: - return qvalue.QValueKindArrayFloat32 - case pgtype.Float8ArrayOID: - return qvalue.QValueKindArrayFloat64 - case pgtype.BoolArrayOID: - return qvalue.QValueKindArrayBoolean - case pgtype.DateArrayOID: - return qvalue.QValueKindArrayDate - case pgtype.TimestampArrayOID: - return qvalue.QValueKindArrayTimestamp - case pgtype.TimestamptzArrayOID: - return qvalue.QValueKindArrayTimestampTZ - case pgtype.UUIDArrayOID: - return qvalue.QValueKindArrayUUID - case pgtype.TextArrayOID, pgtype.VarcharArrayOID, pgtype.BPCharArrayOID: - return qvalue.QValueKindArrayString - case pgtype.JSONArrayOID: - return qvalue.QValueKindArrayJSON - case pgtype.JSONBArrayOID: - return qvalue.QValueKindArrayJSONB - case pgtype.IntervalOID: - return qvalue.QValueKindInterval - case pgtype.TstzrangeOID: - return qvalue.QValueKindTSTZRange - default: - if typeName, ok := c.typeMap.TypeForOID(recvOID); ok { - colType := qvalue.QValueKindString - if typeData, ok := customTypeMapping[recvOID]; ok { - colType = customTypeToQKind(typeData) - } - if _, warned := c.hushWarnOID[recvOID]; !warned { - c.logger.Warn("unsupported field type", - slog.Int64("oid", int64(recvOID)), slog.String("typeName", typeName.Name), slog.String("mapping", string(colType))) - c.hushWarnOID[recvOID] = struct{}{} - } - return colType - } else { - // workaround for some types not being defined by pgtype - switch oid.Oid(recvOID) { - case oid.T_timetz: - return qvalue.QValueKindTimeTZ - case oid.T_point: - return qvalue.QValueKindPoint - default: - if typeData, ok := customTypeMapping[recvOID]; ok { - return customTypeToQKind(typeData) - } - return qvalue.QValueKindString - } + version uint32, +) types.QValueKind { + colType, err := postgres.PostgresOIDToQValueKind(recvOID, customTypeMapping, c.typeMap, version) + if err != nil { + if _, warned := c.hushWarnOID[recvOID]; !warned { + c.logger.Warn( + "unsupported field type", + slog.Int64("oid", int64(recvOID)), + slog.String("typeName", err.Error()), + slog.String("mapping", string(colType))) + c.hushWarnOID[recvOID] = struct{}{} } } + return colType } func qValueKindToPostgresType(colTypeStr string) string { - switch qvalue.QValueKind(colTypeStr) { - case qvalue.QValueKindBoolean: + switch types.QValueKind(colTypeStr) { + case types.QValueKindBoolean: return "BOOLEAN" - case qvalue.QValueKindInt16, qvalue.QValueKindUInt16, qvalue.QValueKindInt8, qvalue.QValueKindUInt8: + case types.QValueKindInt16, types.QValueKindUInt16, types.QValueKindInt8, types.QValueKindUInt8: return "SMALLINT" - case qvalue.QValueKindInt32, qvalue.QValueKindUInt32: + case types.QValueKindInt32, types.QValueKindUInt32: return "INTEGER" - case qvalue.QValueKindInt64, qvalue.QValueKindUInt64: + case types.QValueKindInt64, types.QValueKindUInt64: return "BIGINT" - case qvalue.QValueKindFloat32: + case types.QValueKindFloat32: return "REAL" - case qvalue.QValueKindFloat64: + case types.QValueKindFloat64: return "DOUBLE PRECISION" - case qvalue.QValueKindQChar: + case types.QValueKindQChar: return "\"char\"" - case qvalue.QValueKindString, qvalue.QValueKindEnum: + case types.QValueKindString, types.QValueKindEnum: return "TEXT" - case qvalue.QValueKindBytes: + case types.QValueKindBytes: return "BYTEA" - case qvalue.QValueKindJSON: + case types.QValueKindJSON: return "JSON" - case qvalue.QValueKindJSONB: + case types.QValueKindJSONB: return "JSONB" - case qvalue.QValueKindHStore: + case types.QValueKindHStore: return "HSTORE" - case qvalue.QValueKindUUID: + case types.QValueKindUUID: return "UUID" - case qvalue.QValueKindArrayUUID: + case types.QValueKindArrayUUID: return "UUID[]" - case qvalue.QValueKindTime: + case types.QValueKindTime: return "TIME" - case qvalue.QValueKindTimeTZ: + case types.QValueKindTimeTZ: return "TIMETZ" - case qvalue.QValueKindDate: + case types.QValueKindDate: return "DATE" - case qvalue.QValueKindTimestamp: + case types.QValueKindTimestamp: return "TIMESTAMP" - case qvalue.QValueKindTimestampTZ: + case types.QValueKindTimestampTZ: return "TIMESTAMPTZ" - case qvalue.QValueKindNumeric: + case types.QValueKindNumeric: return "NUMERIC" - case qvalue.QValueKindINET: + case types.QValueKindINET: return "INET" - case qvalue.QValueKindCIDR: + case types.QValueKindCIDR: return "CIDR" - case qvalue.QValueKindMacaddr: + case types.QValueKindMacaddr: return "MACADDR" - case qvalue.QValueKindArrayInt16: + case types.QValueKindArrayInt16: return "SMALLINT[]" - case qvalue.QValueKindArrayInt32: + case types.QValueKindArrayInt32: return "INTEGER[]" - case qvalue.QValueKindArrayInt64: + case types.QValueKindArrayInt64: return "BIGINT[]" - case qvalue.QValueKindArrayFloat32: + case types.QValueKindArrayFloat32: return "REAL[]" - case qvalue.QValueKindArrayFloat64: + case types.QValueKindArrayFloat64: return "DOUBLE PRECISION[]" - case qvalue.QValueKindArrayDate: + case types.QValueKindArrayDate: return "DATE[]" - case qvalue.QValueKindArrayTimestamp: + case types.QValueKindArrayInterval: + return "TEXT[]" + case types.QValueKindArrayTimestamp: return "TIMESTAMP[]" - case qvalue.QValueKindArrayTimestampTZ: + case types.QValueKindArrayTimestampTZ: return "TIMESTAMPTZ[]" - case qvalue.QValueKindArrayBoolean: + case types.QValueKindArrayBoolean: return "BOOLEAN[]" - case qvalue.QValueKindArrayString, qvalue.QValueKindArrayEnum: + case types.QValueKindArrayString, types.QValueKindArrayEnum: return "TEXT[]" - case qvalue.QValueKindArrayJSON: + case types.QValueKindArrayJSON: return "JSON[]" - case qvalue.QValueKindArrayJSONB: + case types.QValueKindArrayJSONB: return "JSONB[]" - case qvalue.QValueKindGeography: + case types.QValueKindArrayNumeric: + return "NUMERIC[]" + case types.QValueKindGeography: return "GEOGRAPHY" - case qvalue.QValueKindGeometry: + case types.QValueKindGeometry: return "GEOMETRY" - case qvalue.QValueKindPoint: + case types.QValueKindPoint: return "POINT" default: return "TEXT" } } -func parseJSON(value any, isArray bool) (qvalue.QValue, error) { +func parseJSON(value any, isArray bool) (types.QValue, error) { jsonVal, err := json.Marshal(value) if err != nil { return nil, fmt.Errorf("failed to parse JSON: %w", err) } - return qvalue.QValueJSON{Val: string(jsonVal), IsArray: isArray}, nil + return types.QValueJSON{Val: string(jsonVal), IsArray: isArray}, nil } func parseUUID(value any) (uuid.UUID, error) { @@ -259,7 +177,7 @@ func parseUUID(value any) (uuid.UUID, error) { } } -func parseUUIDArray(value any) (qvalue.QValue, error) { +func parseUUIDArray(value any) (types.QValue, error) { switch v := value.(type) { case []string: uuids := make([]uuid.UUID, 0, len(v)) @@ -270,15 +188,15 @@ func parseUUIDArray(value any) (qvalue.QValue, error) { } uuids = append(uuids, id) } - return qvalue.QValueArrayUUID{Val: uuids}, nil + return types.QValueArrayUUID{Val: uuids}, nil case [][16]byte: uuids := make([]uuid.UUID, 0, len(v)) for _, v := range v { uuids = append(uuids, uuid.UUID(v)) } - return qvalue.QValueArrayUUID{Val: uuids}, nil + return types.QValueArrayUUID{Val: uuids}, nil case []uuid.UUID: - return qvalue.QValueArrayUUID{Val: v}, nil + return types.QValueArrayUUID{Val: v}, nil case []any: uuids := make([]uuid.UUID, 0, len(v)) for _, v := range v { @@ -288,13 +206,178 @@ func parseUUIDArray(value any) (qvalue.QValue, error) { } uuids = append(uuids, id) } - return qvalue.QValueArrayUUID{Val: uuids}, nil + return types.QValueArrayUUID{Val: uuids}, nil default: return nil, fmt.Errorf("unsupported type for UUID array: %T", value) } } -func convertToArray[T any](kind qvalue.QValueKind, value any) ([]T, error) { +func intervalToString(intervalObject pgtype.Interval) (string, error) { + var interval datatypes.PeerDBInterval + interval.Hours = int(intervalObject.Microseconds / 3600000000) + interval.Minutes = int((intervalObject.Microseconds % 3600000000) / 60000000) + interval.Seconds = float64(intervalObject.Microseconds%60000000) / 1000000.0 + interval.Days = int(intervalObject.Days) + interval.Years = int(intervalObject.Months / 12) + interval.Months = int(intervalObject.Months % 12) + interval.Valid = intervalObject.Valid + + intervalJSON, err := json.Marshal(interval) + if err != nil { + return "", fmt.Errorf("failed to parse interval: %w", err) + } + + if !interval.Valid { + return "", fmt.Errorf("invalid interval: %v", intervalObject) + } + + return string(intervalJSON), nil +} + +var ErrMismatchingRangeType = errors.New("mismatching range type") + +func rangeToTyped[T any](r pgtype.Range[any]) (pgtype.Range[*T], error) { + var lower, upper *T + if r.Lower != nil { + lowerVal, ok := r.Lower.(T) + if !ok { + return pgtype.Range[*T]{}, ErrMismatchingRangeType + } + lower = &lowerVal + } + if r.Upper != nil { + upperVal, ok := r.Upper.(T) + if !ok { + return pgtype.Range[*T]{}, ErrMismatchingRangeType + } + upper = &upperVal + } + return pgtype.Range[*T]{ + Lower: lower, + Upper: upper, + LowerType: r.LowerType, + UpperType: r.UpperType, + Valid: r.Valid, + }, nil +} + +func multirangeToTyped[T any](multirange pgtype.Multirange[pgtype.Range[any]]) (pgtype.Multirange[pgtype.Range[*T]], error) { + ranges := make([]pgtype.Range[*T], 0, multirange.Len()) + for _, anyR := range multirange { + r, err := rangeToTyped[T](anyR) + if err != nil { + return nil, err + } + ranges = append(ranges, r) + } + return pgtype.Multirange[pgtype.Range[*T]](ranges), nil +} + +func (c *PostgresConnector) convertToString(oid uint32, value any) string { + if value == nil { + return "" + } + if buf, err := c.typeMap.Encode(oid, pgtype.TextFormatCode, value, nil); err == nil { + return shared.UnsafeFastReadOnlyBytesToString(buf) + } + // pgx returns us type-erased ranges that it doesn't know how to encode + // but if we bring the types back it becomes able to + if r, ok := value.(pgtype.Range[any]); ok { + var typedR any + var err error + switch oid { + case pgtype.Int4rangeOID: + typedR, err = rangeToTyped[int32](r) + case pgtype.Int8rangeOID: + typedR, err = rangeToTyped[int64](r) + case pgtype.NumrangeOID: + typedR, err = rangeToTyped[pgtype.Numeric](r) + case pgtype.DaterangeOID, pgtype.TsrangeOID, pgtype.TstzrangeOID: + // It might seem like tstzrange needs special handling + // but it's actually all UTC under the hood + typedR, err = rangeToTyped[time.Time](r) + default: + err = errors.ErrUnsupported + } + if err == nil { + var buf []byte + buf, err = c.typeMap.Encode(oid, pgtype.TextFormatCode, typedR, nil) + if err == nil { + return shared.UnsafeFastReadOnlyBytesToString(buf) + } + } + c.logger.Warn(fmt.Sprintf( + "couldn't encode range %v (%T, oid %d): %v", value, value, oid, err, + )) + } + if multirange, ok := value.(pgtype.Multirange[pgtype.Range[any]]); ok { + var typedM any + var err error + switch oid { + case pgtype.Int4multirangeOID: + typedM, err = multirangeToTyped[int32](multirange) + case pgtype.Int8multirangeOID: + typedM, err = multirangeToTyped[int64](multirange) + case pgtype.NummultirangeOID: + typedM, err = multirangeToTyped[pgtype.Numeric](multirange) + case pgtype.DatemultirangeOID, pgtype.TsmultirangeOID, pgtype.TstzmultirangeOID: + typedM, err = multirangeToTyped[time.Time](multirange) + default: + err = errors.ErrUnsupported + } + if err == nil { + var buf []byte + buf, err = c.typeMap.Encode(oid, pgtype.TextFormatCode, typedM, nil) + if err == nil { + return shared.UnsafeFastReadOnlyBytesToString(buf) + } + } + c.logger.Warn(fmt.Sprintf( + "couldn't encode multirange %v (%T, oid %d): %v", value, value, oid, err, + )) + } + return fmt.Sprint(value) +} + +var arrayOidToRangeOid = map[uint32]uint32{ + pgtype.Int4rangeArrayOID: pgtype.Int4rangeOID, + pgtype.Int8rangeArrayOID: pgtype.Int8rangeOID, + pgtype.NumrangeArrayOID: pgtype.NumrangeOID, + pgtype.DaterangeArrayOID: pgtype.DaterangeOID, + pgtype.TsrangeArrayOID: pgtype.TsrangeOID, + pgtype.TstzrangeArrayOID: pgtype.TstzrangeOID, + pgtype.Int4multirangeArrayOID: pgtype.Int4multirangeOID, + pgtype.Int8multirangeArrayOID: pgtype.Int8multirangeOID, + pgtype.NummultirangeArrayOID: pgtype.NummultirangeOID, + pgtype.DatemultirangeArrayOID: pgtype.DatemultirangeOID, + pgtype.TsmultirangeArrayOID: pgtype.TsmultirangeOID, + pgtype.TstzmultirangeArrayOID: pgtype.TstzmultirangeOID, +} + +func (c *PostgresConnector) convertToStringArray(kind types.QValueKind, oid uint32, value any) ([]string, error) { + switch v := value.(type) { + case pgtype.Array[string]: + if v.Valid { + return v.Elements, nil + } + case []string: + return v, nil + case []any: + itemOid := oid + if rangeOid, ok := arrayOidToRangeOid[oid]; ok { + itemOid = rangeOid + } + res := make([]string, 0, len(v)) + for _, item := range v { + str := c.convertToString(itemOid, item) + res = append(res, str) + } + return res, nil + } + return nil, fmt.Errorf("failed to parse array %s from %T: %v", kind, value, value) +} + +func convertToArray[T any](kind types.QValueKind, value any) ([]T, error) { switch v := value.(type) { case pgtype.Array[T]: if v.Valid { @@ -309,88 +392,71 @@ func convertToArray[T any](kind qvalue.QValueKind, value any) ([]T, error) { } func (c *PostgresConnector) parseFieldFromPostgresOID( - oid uint32, value any, customTypeMapping map[uint32]shared.CustomDataType, -) (qvalue.QValue, error) { - qvalueKind := c.postgresOIDToQValueKind(oid, customTypeMapping) + oid uint32, typmod int32, value any, customTypeMapping map[uint32]shared.CustomDataType, version uint32, +) (types.QValue, error) { + qvalueKind := c.postgresOIDToQValueKind(oid, customTypeMapping, version) if value == nil { - return qvalue.QValueNull(qvalueKind), nil + return types.QValueNull(qvalueKind), nil } switch qvalueKind { - case qvalue.QValueKindTimestamp: + case types.QValueKindTimestamp: switch val := value.(type) { case time.Time: - return qvalue.QValueTimestamp{Val: val}, nil + return types.QValueTimestamp{Val: val}, nil case pgtype.InfinityModifier: - return qvalue.QValueNull(qvalueKind), nil + return types.QValueNull(qvalueKind), nil } - case qvalue.QValueKindTimestampTZ: + case types.QValueKindTimestampTZ: switch val := value.(type) { case time.Time: - return qvalue.QValueTimestampTZ{Val: val}, nil + return types.QValueTimestampTZ{Val: val}, nil case pgtype.InfinityModifier: - return qvalue.QValueNull(qvalueKind), nil - } - case qvalue.QValueKindInterval: - intervalObject := value.(pgtype.Interval) - var interval datatypes.PeerDBInterval - interval.Hours = int(intervalObject.Microseconds / 3600000000) - interval.Minutes = int((intervalObject.Microseconds % 3600000000) / 60000000) - interval.Seconds = float64(intervalObject.Microseconds%60000000) / 1000000.0 - interval.Days = int(intervalObject.Days) - interval.Years = int(intervalObject.Months / 12) - interval.Months = int(intervalObject.Months % 12) - interval.Valid = intervalObject.Valid - - intervalJSON, err := json.Marshal(interval) - if err != nil { - return nil, fmt.Errorf("failed to parse interval: %w", err) + return types.QValueNull(qvalueKind), nil } - - if !interval.Valid { - return nil, fmt.Errorf("invalid interval: %v", value) - } - - return qvalue.QValueString{Val: string(intervalJSON)}, nil - case qvalue.QValueKindTSTZRange: - tstzrangeObject := value.(pgtype.Range[any]) - lowerBoundType := tstzrangeObject.LowerType - upperBoundType := tstzrangeObject.UpperType - lowerTime, err := convertTimeRangeBound(tstzrangeObject.Lower) - if err != nil { - return nil, fmt.Errorf("[tstzrange]error for lower time bound: %w", err) - } - - upperTime, err := convertTimeRangeBound(tstzrangeObject.Upper) - if err != nil { - return nil, fmt.Errorf("[tstzrange]error for upper time bound: %w", err) + case types.QValueKindInterval: + if interval, ok := value.(pgtype.Interval); ok { + str, err := intervalToString(interval) + if err != nil { + return nil, err + } + return types.QValueInterval{Val: str}, nil + } + case types.QValueKindArrayInterval: + if arr, ok := value.([]any); ok { + success := true + strs := make([]string, 0, len(arr)) + for _, item := range arr { + if item == nil { + strs = append(strs, "") + } else if interval, ok := item.(pgtype.Interval); ok { + str, err := intervalToString(interval) + if err != nil { + return nil, fmt.Errorf("failed to parse interval array: %w", err) + } + strs = append(strs, str) + } else { + success = false + break + } + } + if success { + return types.QValueArrayInterval{Val: strs}, nil + } } - - lowerBracket := "[" - if lowerBoundType == pgtype.Exclusive { - lowerBracket = "(" - } - upperBracket := "]" - if upperBoundType == pgtype.Exclusive { - upperBracket = ")" - } - tstzrangeStr := fmt.Sprintf("%s%v,%v%s", - lowerBracket, lowerTime, upperTime, upperBracket) - return qvalue.QValueTSTZRange{Val: tstzrangeStr}, nil - case qvalue.QValueKindDate: + case types.QValueKindDate: switch val := value.(type) { case time.Time: - return qvalue.QValueDate{Val: val}, nil + return types.QValueDate{Val: val}, nil case pgtype.InfinityModifier: - return qvalue.QValueNull(qvalueKind), nil + return types.QValueNull(qvalueKind), nil } - case qvalue.QValueKindTime: + case types.QValueKindTime: timeVal := value.(pgtype.Time) if timeVal.Valid { - // 86399999999 to prevent 24:00:00 - return qvalue.QValueTime{Val: time.UnixMicro(min(timeVal.Microseconds, 86399999999))}, nil + return types.QValueTime{Val: time.Duration(timeVal.Microseconds) * time.Microsecond}, nil } - case qvalue.QValueKindTimeTZ: + case types.QValueKindTimeTZ: timeVal := value.(string) // edge case, Postgres supports this extreme value for time timeVal = strings.Replace(timeVal, "24:00:00.000000", "23:59:59.999999", 1) @@ -411,252 +477,274 @@ func (c *PostgresConnector) parseFieldFromPostgresOID( if err != nil { return nil, fmt.Errorf("failed to parse time: %w", err) } - return qvalue.QValueTimeTZ{Val: t.AddDate(1970, 0, 0)}, nil - case qvalue.QValueKindBoolean: + return types.QValueTimeTZ{Val: t.UTC().Sub(shared.Year0000)}, nil + case types.QValueKindBoolean: boolVal := value.(bool) - return qvalue.QValueBoolean{Val: boolVal}, nil - case qvalue.QValueKindJSON, qvalue.QValueKindJSONB: + return types.QValueBoolean{Val: boolVal}, nil + case types.QValueKindJSON, types.QValueKindJSONB: tmp, err := parseJSON(value, false) if err != nil { return nil, fmt.Errorf("failed to parse JSON: %w", err) } return tmp, nil - case qvalue.QValueKindArrayJSON, qvalue.QValueKindArrayJSONB: + case types.QValueKindArrayJSON, types.QValueKindArrayJSONB: tmp, err := parseJSON(value, true) if err != nil { return nil, fmt.Errorf("failed to parse JSON Array: %w", err) } return tmp, nil - case qvalue.QValueKindInt16: + case types.QValueKindInt16: intVal := value.(int16) - return qvalue.QValueInt16{Val: intVal}, nil - case qvalue.QValueKindInt32: + return types.QValueInt16{Val: intVal}, nil + case types.QValueKindInt32: intVal := value.(int32) - return qvalue.QValueInt32{Val: intVal}, nil - case qvalue.QValueKindInt64: + return types.QValueInt32{Val: intVal}, nil + case types.QValueKindInt64: intVal := value.(int64) - return qvalue.QValueInt64{Val: intVal}, nil - case qvalue.QValueKindFloat32: + return types.QValueInt64{Val: intVal}, nil + case types.QValueKindFloat32: floatVal := value.(float32) - return qvalue.QValueFloat32{Val: floatVal}, nil - case qvalue.QValueKindFloat64: + return types.QValueFloat32{Val: floatVal}, nil + case types.QValueKindFloat64: floatVal := value.(float64) - return qvalue.QValueFloat64{Val: floatVal}, nil - case qvalue.QValueKindQChar: - return qvalue.QValueQChar{Val: uint8(value.(rune))}, nil - case qvalue.QValueKindString: + return types.QValueFloat64{Val: floatVal}, nil + case types.QValueKindQChar: + return types.QValueQChar{Val: uint8(value.(rune))}, nil + case types.QValueKindString: // handling all unsupported types with strings as well for now. - return qvalue.QValueString{Val: fmt.Sprint(value)}, nil - case qvalue.QValueKindEnum: - return qvalue.QValueEnum{Val: fmt.Sprint(value)}, nil - case qvalue.QValueKindUUID: + str := c.convertToString(oid, value) + return types.QValueString{Val: str}, nil + case types.QValueKindEnum: + return types.QValueEnum{Val: fmt.Sprint(value)}, nil + case types.QValueKindUUID: tmp, err := parseUUID(value) if err != nil { return nil, fmt.Errorf("failed to parse UUID: %w", err) } - return qvalue.QValueUUID{Val: tmp}, nil - case qvalue.QValueKindArrayUUID: + return types.QValueUUID{Val: tmp}, nil + case types.QValueKindArrayUUID: tmp, err := parseUUIDArray(value) if err != nil { return nil, fmt.Errorf("failed to parse UUID array: %w", err) } return tmp, nil - case qvalue.QValueKindINET: + case types.QValueKindINET: switch v := value.(type) { case string: - return qvalue.QValueINET{Val: v}, nil + return types.QValueINET{Val: v}, nil case netip.Prefix: - return qvalue.QValueINET{Val: v.String()}, nil + return types.QValueINET{Val: v.String()}, nil default: return nil, fmt.Errorf("failed to parse INET: %v", v) } - case qvalue.QValueKindCIDR: + case types.QValueKindCIDR: switch v := value.(type) { case string: - return qvalue.QValueCIDR{Val: v}, nil + return types.QValueCIDR{Val: v}, nil case netip.Prefix: - return qvalue.QValueCIDR{Val: v.String()}, nil + return types.QValueCIDR{Val: v.String()}, nil default: return nil, fmt.Errorf("failed to parse CIDR: %v", value) } - case qvalue.QValueKindMacaddr: + case types.QValueKindMacaddr: switch v := value.(type) { case string: - return qvalue.QValueMacaddr{Val: v}, nil + return types.QValueMacaddr{Val: v}, nil case net.HardwareAddr: - return qvalue.QValueMacaddr{Val: v.String()}, nil + return types.QValueMacaddr{Val: v.String()}, nil default: return nil, fmt.Errorf("failed to parse MACADDR: %v %T", value, v) } - case qvalue.QValueKindBytes: + case types.QValueKindBytes: rawBytes := value.([]byte) - return qvalue.QValueBytes{Val: rawBytes}, nil - case qvalue.QValueKindNumeric: + return types.QValueBytes{Val: rawBytes}, nil + case types.QValueKindNumeric: numVal := value.(pgtype.Numeric) if numVal.Valid { - num, err := numericToDecimal(numVal) + num, ok := validNumericToDecimal(numVal) + if !ok { + return types.QValueNull(types.QValueKindNumeric), nil + } + precision, scale := datatypes.ParseNumericTypmod(typmod) + return types.QValueNumeric{ + Val: num, + Precision: precision, + Scale: scale, + }, nil + } + case types.QValueKindArrayFloat32: + switch value := value.(type) { + case string: + typeData := customTypeMapping[oid] + switch typeData.Name { + case "vector": + var vector pgvector.Vector + if err := vector.Parse(value); err != nil { + return nil, fmt.Errorf("[pg] failed to parse vector: %w", err) + } + return types.QValueArrayFloat32{Val: vector.Slice()}, nil + case "halfvec": + var halfvec pgvector.HalfVector + if err := halfvec.Parse(value); err != nil { + return nil, fmt.Errorf("[pg] failed to parse halfvec: %w", err) + } + return types.QValueArrayFloat32{Val: halfvec.Slice()}, nil + case "sparsevec": + var sparsevec pgvector.SparseVector + if err := sparsevec.Parse(value); err != nil { + return nil, fmt.Errorf("[pg] failed to parse sparsevec: %w", err) + } + return types.QValueArrayFloat32{Val: sparsevec.Slice()}, nil + default: + return nil, fmt.Errorf("unknown float array type %s", typeData.Name) + } + case interface{ Slice() []float32 }: + return types.QValueArrayFloat32{Val: value.Slice()}, nil + default: + a, err := convertToArray[float32](qvalueKind, value) if err != nil { - return nil, fmt.Errorf("failed to convert numeric [%v] to decimal: %w", value, err) + return nil, err } - return num, nil - } - case qvalue.QValueKindArrayFloat32: - a, err := convertToArray[float32](qvalueKind, value) - if err != nil { - return nil, err + return types.QValueArrayFloat32{Val: a}, nil } - return qvalue.QValueArrayFloat32{Val: a}, nil - case qvalue.QValueKindArrayFloat64: + case types.QValueKindArrayFloat64: a, err := convertToArray[float64](qvalueKind, value) if err != nil { return nil, err } - return qvalue.QValueArrayFloat64{Val: a}, nil - case qvalue.QValueKindArrayInt16: + return types.QValueArrayFloat64{Val: a}, nil + case types.QValueKindArrayInt16: a, err := convertToArray[int16](qvalueKind, value) if err != nil { return nil, err } - return qvalue.QValueArrayInt16{Val: a}, nil - case qvalue.QValueKindArrayInt32: + return types.QValueArrayInt16{Val: a}, nil + case types.QValueKindArrayInt32: a, err := convertToArray[int32](qvalueKind, value) if err != nil { return nil, err } - return qvalue.QValueArrayInt32{Val: a}, nil - case qvalue.QValueKindArrayInt64: + return types.QValueArrayInt32{Val: a}, nil + case types.QValueKindArrayInt64: a, err := convertToArray[int64](qvalueKind, value) if err != nil { return nil, err } - return qvalue.QValueArrayInt64{Val: a}, nil - case qvalue.QValueKindArrayDate, qvalue.QValueKindArrayTimestamp, qvalue.QValueKindArrayTimestampTZ: + return types.QValueArrayInt64{Val: a}, nil + case types.QValueKindArrayDate, types.QValueKindArrayTimestamp, types.QValueKindArrayTimestampTZ: a, err := convertToArray[time.Time](qvalueKind, value) if err != nil { return nil, err } switch qvalueKind { - case qvalue.QValueKindArrayDate: - return qvalue.QValueArrayDate{Val: a}, nil - case qvalue.QValueKindArrayTimestamp: - return qvalue.QValueArrayTimestamp{Val: a}, nil - case qvalue.QValueKindArrayTimestampTZ: - return qvalue.QValueArrayTimestampTZ{Val: a}, nil - } - case qvalue.QValueKindArrayBoolean: + case types.QValueKindArrayDate: + return types.QValueArrayDate{Val: a}, nil + case types.QValueKindArrayTimestamp: + return types.QValueArrayTimestamp{Val: a}, nil + case types.QValueKindArrayTimestampTZ: + return types.QValueArrayTimestampTZ{Val: a}, nil + } + case types.QValueKindArrayBoolean: a, err := convertToArray[bool](qvalueKind, value) if err != nil { return nil, err } - return qvalue.QValueArrayBoolean{Val: a}, nil - case qvalue.QValueKindArrayString: + return types.QValueArrayBoolean{Val: a}, nil + case types.QValueKindArrayString: if str, ok := value.(string); ok { delim := byte(',') if typeData, ok := customTypeMapping[oid]; ok { delim = typeData.Delim } - return qvalue.QValueArrayString{Val: shared.ParsePgArrayStringToStringSlice(str, delim)}, nil + arr := shared.ParsePgArrayStringToStringSlice(str, delim) + return types.QValueArrayString{Val: arr}, nil } else { - a, err := convertToArray[string](qvalueKind, value) + // Arrays of unsupported types become string arrays too + arr, err := c.convertToStringArray(qvalueKind, oid, value) if err != nil { return nil, err } - return qvalue.QValueArrayString{Val: a}, nil + return types.QValueArrayString{Val: arr}, nil } - case qvalue.QValueKindArrayEnum: + case types.QValueKindArrayEnum: if str, ok := value.(string); ok { delim := byte(',') if typeData, ok := customTypeMapping[oid]; ok { delim = typeData.Delim } - return qvalue.QValueArrayEnum{Val: shared.ParsePgArrayStringToStringSlice(str, delim)}, nil + return types.QValueArrayEnum{Val: shared.ParsePgArrayStringToStringSlice(str, delim)}, nil } else { a, err := convertToArray[string](qvalueKind, value) if err != nil { return nil, err } - return qvalue.QValueArrayEnum{Val: a}, nil + return types.QValueArrayEnum{Val: a}, nil + } + case types.QValueKindArrayNumeric: + if v, ok := value.([]any); ok { + numArr := make([]decimal.Decimal, 0, len(v)) + allValid := true + for _, anyVal := range v { + if anyVal == nil { + numArr = append(numArr, decimal.Decimal{}) + continue + } + numVal, ok := anyVal.(pgtype.Numeric) + if !ok { + return nil, fmt.Errorf("failed to cast ArrayNumeric as []pgtype.Numeric: got %T", anyVal) + } + if !numVal.Valid { + allValid = false + break + } + num, ok := validNumericToDecimal(numVal) + if !ok { + numArr = append(numArr, decimal.Decimal{}) + } else { + numArr = append(numArr, num) + } + } + if allValid { + precision, scale := datatypes.ParseNumericTypmod(typmod) + return types.QValueArrayNumeric{ + Val: numArr, + Precision: precision, + Scale: scale, + }, nil + } } - case qvalue.QValueKindPoint: + case types.QValueKindPoint: coord := value.(pgtype.Point).P - return qvalue.QValuePoint{ + return types.QValuePoint{ Val: fmt.Sprintf("POINT(%f %f)", coord.X, coord.Y), }, nil - case qvalue.QValueKindHStore: - return qvalue.QValueHStore{Val: fmt.Sprint(value)}, nil - case qvalue.QValueKindGeography, qvalue.QValueKindGeometry: + case types.QValueKindHStore: + return types.QValueHStore{Val: fmt.Sprint(value)}, nil + case types.QValueKindGeography, types.QValueKindGeometry: wkbString, ok := value.(string) wkt, err := datatypes.GeoValidate(wkbString) if err != nil || !ok { - return qvalue.QValueNull(qvalue.QValueKindGeography), nil - } else if qvalueKind == qvalue.QValueKindGeography { - return qvalue.QValueGeography{Val: wkt}, nil + return types.QValueNull(types.QValueKindGeography), nil + } else if qvalueKind == types.QValueKindGeography { + return types.QValueGeography{Val: wkt}, nil } else { - return qvalue.QValueGeometry{Val: wkt}, nil + return types.QValueGeometry{Val: wkt}, nil } default: if textVal, ok := value.(string); ok { - return qvalue.QValueString{Val: textVal}, nil + return types.QValueString{Val: textVal}, nil } } // parsing into pgtype failed. - return nil, fmt.Errorf("failed to parse value %v into QValueKind %v", value, qvalueKind) -} - -func numericToDecimal(numVal pgtype.Numeric) (qvalue.QValue, error) { - switch { - case !numVal.Valid: - return qvalue.QValueNull(qvalue.QValueKindNumeric), errors.New("invalid numeric") - case numVal.NaN, numVal.InfinityModifier == pgtype.Infinity, - numVal.InfinityModifier == pgtype.NegativeInfinity: - return qvalue.QValueNull(qvalue.QValueKindNumeric), nil - default: - return qvalue.QValueNumeric{Val: decimal.NewFromBigInt(numVal.Int, numVal.Exp)}, nil - } + return nil, fmt.Errorf("failed to parse value %v (%T) into QValueKind %v", value, value, qvalueKind) } -func customTypeToQKind(typeData shared.CustomDataType) qvalue.QValueKind { - if typeData.Type == 'e' { - if typeData.Delim != 0 { - return qvalue.QValueKindArrayEnum - } else { - return qvalue.QValueKindEnum - } - } - - if typeData.Delim != 0 { - return qvalue.QValueKindArrayString - } - - switch typeData.Name { - case "geometry": - return qvalue.QValueKindGeometry - case "geography": - return qvalue.QValueKindGeography - case "hstore": - return qvalue.QValueKindHStore - default: - return qvalue.QValueKindString - } -} - -// Postgres does not like timestamps of the form 2006-01-02 15:04:05 +0000 UTC -// in tstzrange. -// convertTimeRangeBound removes the +0000 UTC part -func convertTimeRangeBound(timeBound any) (string, error) { - if timeBound, isInfinite := timeBound.(pgtype.InfinityModifier); isInfinite { - return timeBound.String(), nil - } - - layout := "2006-01-02 15:04:05 -0700 MST" - postgresFormat := "2006-01-02 15:04:05" - if timeBound != nil { - lowerParsed, err := time.Parse(layout, fmt.Sprint(timeBound)) - if err != nil { - return "", fmt.Errorf("unexpected bound value in tstzrange. Error: %v", err) - } - return lowerParsed.Format(postgresFormat), nil +func validNumericToDecimal(numVal pgtype.Numeric) (decimal.Decimal, bool) { + if numVal.NaN || numVal.InfinityModifier == pgtype.Infinity || + numVal.InfinityModifier == pgtype.NegativeInfinity { + return decimal.Decimal{}, false + } else { + return decimal.NewFromBigInt(numVal.Int, numVal.Exp), true } - return "", nil } diff --git a/flow/connectors/postgres/schema.go b/flow/connectors/postgres/schema.go index 07dab7375b..d7be07f38e 100644 --- a/flow/connectors/postgres/schema.go +++ b/flow/connectors/postgres/schema.go @@ -98,7 +98,7 @@ func (c *PostgresConnector) GetTablesInSchema( return &protos.SchemaTablesResponse{Tables: tables}, nil } -func (c *PostgresConnector) GetColumns(ctx context.Context, schema string, table string) (*protos.TableColumnsResponse, error) { +func (c *PostgresConnector) GetColumns(ctx context.Context, version uint32, schema string, table string) (*protos.TableColumnsResponse, error) { rows, err := c.conn.Query(ctx, `SELECT DISTINCT attname AS column_name, atttypid AS oid, @@ -131,7 +131,7 @@ func (c *PostgresConnector) GetColumns(ctx context.Context, schema string, table Name: columnName.String, Type: datatype.String, IsKey: isPkey.Bool, - Qkind: string(c.postgresOIDToQValueKind(oid, c.customTypeMapping)), + Qkind: string(c.postgresOIDToQValueKind(oid, c.customTypeMapping, version)), }, nil }) if err != nil { diff --git a/flow/connectors/postgres/schema_delta_test_constants.go b/flow/connectors/postgres/schema_delta_test_constants.go index 242940e814..2559d19d52 100644 --- a/flow/connectors/postgres/schema_delta_test_constants.go +++ b/flow/connectors/postgres/schema_delta_test_constants.go @@ -2,109 +2,109 @@ package connpostgres import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) var AddAllColumnTypesFields = []*protos.FieldDescription{ { Name: "id", - Type: string(qvalue.QValueKindInt32), + Type: string(types.QValueKindInt32), TypeModifier: -1, Nullable: false, }, { Name: "c2", - Type: string(qvalue.QValueKindBoolean), + Type: string(types.QValueKindBoolean), TypeModifier: -1, Nullable: true, }, { Name: "c3", - Type: string(qvalue.QValueKindBytes), + Type: string(types.QValueKindBytes), TypeModifier: -1, Nullable: true, }, { Name: "c4", - Type: string(qvalue.QValueKindDate), + Type: string(types.QValueKindDate), TypeModifier: -1, Nullable: true, }, { Name: "c5", - Type: string(qvalue.QValueKindFloat32), + Type: string(types.QValueKindFloat32), TypeModifier: -1, Nullable: true, }, { Name: "c6", - Type: string(qvalue.QValueKindFloat64), + Type: string(types.QValueKindFloat64), TypeModifier: -1, Nullable: true, }, { Name: "c7", - Type: string(qvalue.QValueKindInt16), + Type: string(types.QValueKindInt16), TypeModifier: -1, Nullable: true, }, { Name: "c8", - Type: string(qvalue.QValueKindInt32), + Type: string(types.QValueKindInt32), TypeModifier: -1, Nullable: true, }, { Name: "c9", - Type: string(qvalue.QValueKindInt64), + Type: string(types.QValueKindInt64), TypeModifier: -1, Nullable: true, }, { Name: "c10", - Type: string(qvalue.QValueKindJSON), + Type: string(types.QValueKindJSON), TypeModifier: -1, Nullable: true, }, { Name: "c11", - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, Nullable: true, }, { Name: "c12", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "c13", - Type: string(qvalue.QValueKindQChar), + Type: string(types.QValueKindQChar), TypeModifier: -1, Nullable: true, }, { Name: "c14", - Type: string(qvalue.QValueKindTime), + Type: string(types.QValueKindTime), TypeModifier: -1, Nullable: true, }, { Name: "c15", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, Nullable: true, }, { Name: "c16", - Type: string(qvalue.QValueKindTimestampTZ), + Type: string(types.QValueKindTimestampTZ), TypeModifier: -1, Nullable: true, }, { Name: "c17", - Type: string(qvalue.QValueKindUUID), + Type: string(types.QValueKindUUID), TypeModifier: -1, Nullable: true, }, @@ -113,61 +113,61 @@ var AddAllColumnTypesFields = []*protos.FieldDescription{ var TrickyFields = []*protos.FieldDescription{ { Name: "id", - Type: string(qvalue.QValueKindInt32), + Type: string(types.QValueKindInt32), TypeModifier: -1, Nullable: false, }, { Name: "c1", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "C1", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "C 1", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "right", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "select", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "XMIN", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "Cariño", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "±ªþ³§", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: "カラム", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, @@ -176,25 +176,25 @@ var TrickyFields = []*protos.FieldDescription{ var WhitespaceFields = []*protos.FieldDescription{ { Name: " ", - Type: string(qvalue.QValueKindInt32), + Type: string(types.QValueKindInt32), TypeModifier: -1, Nullable: false, }, { Name: " ", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, Nullable: true, }, { Name: " ", - Type: string(qvalue.QValueKindInt64), + Type: string(types.QValueKindInt64), TypeModifier: -1, Nullable: true, }, { Name: "\t", - Type: string(qvalue.QValueKindDate), + Type: string(types.QValueKindDate), TypeModifier: -1, Nullable: true, }, diff --git a/flow/connectors/pubsub/qrep.go b/flow/connectors/pubsub/qrep.go index 1f6bea6cd8..e47ececc5d 100644 --- a/flow/connectors/pubsub/qrep.go +++ b/flow/connectors/pubsub/qrep.go @@ -12,6 +12,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/pua" + "github.com/PeerDB-io/peerdb/flow/shared" ) func (*PubSubConnector) SetupQRepMetadataTables(_ context.Context, _ *protos.QRepConfig) error { @@ -23,11 +24,11 @@ func (c *PubSubConnector) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { startTime := time.Now() schema, err := stream.Schema() if err != nil { - return 0, err + return 0, nil, err } topiccache := topicCache{cache: make(map[string]*pubsub.Topic)} publish := make(chan publishResult, 32) @@ -37,7 +38,7 @@ func (c *PubSubConnector) SyncQRepRecords( queueCtx, queueErr := context.WithCancelCause(ctx) pool, err := c.createPool(queueCtx, config.Env, config.Script, config.FlowJobName, &topiccache, publish, queueErr) if err != nil { - return 0, err + return 0, nil, err } defer pool.Close() @@ -116,18 +117,18 @@ Loop: } if err := pool.Wait(queueCtx); err != nil { - return 0, err + return 0, nil, err } close(publish) topiccache.Stop(queueCtx) select { case <-queueCtx.Done(): - return 0, queueCtx.Err() + return 0, nil, queueCtx.Err() case <-waitChan: } if err := c.FinishQRepPartition(ctx, partition, config.FlowJobName, startTime); err != nil { - return 0, err + return 0, nil, err } - return numRecords.Load(), nil + return numRecords.Load(), nil, nil } diff --git a/flow/connectors/s3/qrep.go b/flow/connectors/s3/qrep.go index b7241b1f05..b99f7549b3 100644 --- a/flow/connectors/s3/qrep.go +++ b/flow/connectors/s3/qrep.go @@ -7,10 +7,10 @@ import ( "github.com/hamba/avro/v2/ocf" "github.com/PeerDB-io/peerdb/flow/connectors/utils" - avro "github.com/PeerDB-io/peerdb/flow/connectors/utils/avro" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func (c *S3Connector) SyncQRepRecords( @@ -18,31 +18,31 @@ func (c *S3Connector) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { schema, err := stream.Schema() if err != nil { - return 0, err + return 0, nil, err } dstTableName := config.DestinationTableIdentifier avroSchema, err := getAvroSchema(ctx, config.Env, dstTableName, schema) if err != nil { - return 0, err + return 0, nil, err } numRecords, err := c.writeToAvroFile(ctx, config.Env, stream, avroSchema, partition.PartitionId, config.FlowJobName) if err != nil { - return 0, err + return 0, nil, err } - return numRecords, nil + return numRecords, nil, nil } func getAvroSchema( ctx context.Context, env map[string]string, dstTableName string, - schema qvalue.QRecordSchema, + schema types.QRecordSchema, ) (*model.QRecordAvroSchemaDefinition, error) { // TODO: Support avro-incompatible column names avroSchema, err := model.GetAvroSchemaDefinition(ctx, env, dstTableName, schema, protos.DBType_S3, nil) @@ -68,8 +68,22 @@ func (c *S3Connector) writeToAvroFile( s3AvroFileKey := fmt.Sprintf("%s/%s/%s.avro", s3o.Prefix, jobName, partitionID) - writer := avro.NewPeerDBOCFWriter(stream, avroSchema, ocf.Null, protos.DBType_S3) - avroFile, err := writer.WriteRecordsToS3(ctx, env, s3o.Bucket, s3AvroFileKey, c.credentialsProvider, nil, nil) + var codec ocf.CodecName + switch c.codec { + case protos.AvroCodec_Null: + codec = ocf.Null + case protos.AvroCodec_Deflate: + codec = ocf.Deflate + case protos.AvroCodec_Snappy: + codec = ocf.Snappy + case protos.AvroCodec_ZStandard: + codec = ocf.ZStandard + default: + return 0, fmt.Errorf("unsupported codec %s", c.codec) + } + + writer := utils.NewPeerDBOCFWriter(stream, avroSchema, codec, protos.DBType_S3) + avroFile, err := writer.WriteRecordsToS3(ctx, env, s3o.Bucket, s3AvroFileKey, c.credentialsProvider, nil, nil, nil) if err != nil { return 0, fmt.Errorf("failed to write records to S3: %w", err) } diff --git a/flow/connectors/s3/s3.go b/flow/connectors/s3/s3.go index 9a3aa9865d..d009b49846 100644 --- a/flow/connectors/s3/s3.go +++ b/flow/connectors/s3/s3.go @@ -5,7 +5,6 @@ import ( "fmt" "strconv" - "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" "go.temporal.io/sdk/log" @@ -22,6 +21,7 @@ type S3Connector struct { credentialsProvider utils.AWSCredentialsProvider client s3.Client url string + codec protos.AvroCodec } func NewS3Connector( @@ -30,15 +30,7 @@ func NewS3Connector( ) (*S3Connector, error) { logger := internal.LoggerFromCtx(ctx) - provider, err := utils.GetAWSCredentialsProvider(ctx, "s3", utils.PeerAWSCredentials{ - Credentials: aws.Credentials{ - AccessKeyID: config.GetAccessKeyId(), - SecretAccessKey: config.GetSecretAccessKey(), - }, - RoleArn: config.RoleArn, - EndpointUrl: config.Endpoint, - Region: config.GetRegion(), - }) + provider, err := utils.GetAWSCredentialsProvider(ctx, "s3", utils.NewPeerAWSCredentials(config)) if err != nil { return nil, err } @@ -53,11 +45,12 @@ func NewS3Connector( return nil, err } return &S3Connector{ - url: config.Url, PostgresMetadata: pgMetadata, client: *s3Client, credentialsProvider: provider, logger: logger, + url: config.Url, + codec: config.Codec, }, nil } @@ -85,8 +78,10 @@ func (c *S3Connector) ConnectionActive(ctx context.Context) error { func (c *S3Connector) SyncRecords(ctx context.Context, req *model.SyncRecordsRequest[model.RecordItems]) (*model.SyncResponse, error) { tableNameRowsMapping := utils.InitialiseTableRowsMap(req.TableMappings) - streamReq := model.NewRecordsToStreamRequest(req.Records.GetRecords(), tableNameRowsMapping, req.SyncBatchID) - recordStream, err := utils.RecordsToRawTableStream(streamReq) + streamReq := model.NewRecordsToStreamRequest( + req.Records.GetRecords(), tableNameRowsMapping, req.SyncBatchID, false, protos.DBType_S3, + ) + recordStream, err := utils.RecordsToRawTableStream(streamReq, nil) if err != nil { return nil, fmt.Errorf("failed to convert records to raw table stream: %w", err) } @@ -94,11 +89,12 @@ func (c *S3Connector) SyncRecords(ctx context.Context, req *model.SyncRecordsReq FlowJobName: req.FlowJobName, DestinationTableIdentifier: "raw_table_" + req.FlowJobName, Env: req.Env, + Version: req.Version, } partition := &protos.QRepPartition{ PartitionId: strconv.FormatInt(req.SyncBatchID, 10), } - numRecords, err := c.SyncQRepRecords(ctx, qrepConfig, partition, recordStream) + numRecords, _, err := c.SyncQRepRecords(ctx, qrepConfig, partition, recordStream) if err != nil { return nil, err } diff --git a/flow/connectors/snowflake/avro_file_writer_test.go b/flow/connectors/snowflake/avro_file_writer_test.go index 4ea2001a34..95adfdf1cc 100644 --- a/flow/connectors/snowflake/avro_file_writer_test.go +++ b/flow/connectors/snowflake/avro_file_writer_test.go @@ -11,56 +11,60 @@ import ( "github.com/shopspring/decimal" "github.com/stretchr/testify/require" - avro "github.com/PeerDB-io/peerdb/flow/connectors/utils/avro" + "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) // createQValue creates a QValue of the appropriate kind for a given placeholder. -func createQValue(t *testing.T, kind qvalue.QValueKind, placeholder int) qvalue.QValue { +func createQValue(t *testing.T, kind types.QValueKind, placeholder int) types.QValue { t.Helper() switch kind { - case qvalue.QValueKindInt16: - return qvalue.QValueInt16{Val: int16(placeholder)} - case qvalue.QValueKindInt32: - return qvalue.QValueInt32{Val: int32(placeholder)} - case qvalue.QValueKindInt64: - return qvalue.QValueInt64{Val: int64(placeholder)} - case qvalue.QValueKindFloat32: - return qvalue.QValueFloat32{Val: float32(placeholder) / 4.0} - case qvalue.QValueKindFloat64: - return qvalue.QValueFloat64{Val: float64(placeholder) / 4.0} - case qvalue.QValueKindBoolean: - return qvalue.QValueBoolean{Val: placeholder%2 == 0} - case qvalue.QValueKindString: - return qvalue.QValueString{Val: fmt.Sprintf("string%d", placeholder)} - case qvalue.QValueKindTimestamp: - return qvalue.QValueTimestamp{Val: time.Now()} - case qvalue.QValueKindTimestampTZ: - return qvalue.QValueTimestampTZ{Val: time.Now()} - case qvalue.QValueKindTime: - return qvalue.QValueTime{Val: time.Now()} - case qvalue.QValueKindTimeTZ: - return qvalue.QValueTimeTZ{Val: time.Now()} - case qvalue.QValueKindDate: - return qvalue.QValueDate{Val: time.Now()} - case qvalue.QValueKindNumeric: - return qvalue.QValueNumeric{Val: decimal.New(int64(placeholder), 1)} - case qvalue.QValueKindUUID: - return qvalue.QValueUUID{Val: uuid.New()} // assuming you have the github.com/google/uuid package - case qvalue.QValueKindQChar: - return qvalue.QValueQChar{Val: uint8(48 + placeholder%10)} - // case qvalue.QValueKindArray: + case types.QValueKindInt16: + return types.QValueInt16{Val: int16(placeholder)} + case types.QValueKindInt32: + return types.QValueInt32{Val: int32(placeholder)} + case types.QValueKindInt64: + return types.QValueInt64{Val: int64(placeholder)} + case types.QValueKindFloat32: + return types.QValueFloat32{Val: float32(placeholder) / 4.0} + case types.QValueKindFloat64: + return types.QValueFloat64{Val: float64(placeholder) / 4.0} + case types.QValueKindBoolean: + return types.QValueBoolean{Val: placeholder%2 == 0} + case types.QValueKindString: + return types.QValueString{Val: fmt.Sprintf("string%d", placeholder)} + case types.QValueKindTimestamp: + return types.QValueTimestamp{Val: time.Now()} + case types.QValueKindTimestampTZ: + return types.QValueTimestampTZ{Val: time.Now()} + case types.QValueKindTime: + return types.QValueTime{Val: 21600000000} + case types.QValueKindTimeTZ: + return types.QValueTimeTZ{Val: 21600000000} + case types.QValueKindDate: + return types.QValueDate{Val: time.Now()} + case types.QValueKindNumeric: + return types.QValueNumeric{Val: decimal.New(int64(placeholder), 1)} + case types.QValueKindUUID: + return types.QValueUUID{Val: uuid.New()} // assuming you have the github.com/google/uuid package + case types.QValueKindQChar: + return types.QValueQChar{Val: uint8(48 + placeholder%10)} + // case types.QValueKindArray: // value = []int{1, 2, 3} // placeholder array, replace with actual logic - // case qvalue.QValueKindJSON: + // case types.QValueKindJSON: // value = `{"key": "value"}` // placeholder JSON, replace with actual logic - case qvalue.QValueKindBytes: - return qvalue.QValueBytes{Val: []byte("sample bytes")} // placeholder bytes, replace with actual logic + case types.QValueKindBytes: + return types.QValueBytes{Val: []byte("sample bytes")} // placeholder bytes, replace with actual logic + case types.QValueKindArrayNumeric: + return types.QValueArrayNumeric{Val: []decimal.Decimal{ + decimal.New(int64(placeholder), 1), + }} default: require.Failf(t, "unsupported QValueKind", "unsupported QValueKind: %s", kind) - return qvalue.QValueNull(kind) + return types.QValueNull(kind) } } @@ -70,37 +74,38 @@ func generateRecords( nullable bool, numRows uint32, allnulls bool, -) (*model.QRecordStream, qvalue.QRecordSchema) { +) (*model.QRecordStream, types.QRecordSchema) { t.Helper() - allQValueKinds := []qvalue.QValueKind{ - qvalue.QValueKindFloat32, - qvalue.QValueKindFloat64, - qvalue.QValueKindInt16, - qvalue.QValueKindInt32, - qvalue.QValueKindInt64, - qvalue.QValueKindBoolean, - // qvalue.QValueKindArray, - qvalue.QValueKindString, - qvalue.QValueKindTimestamp, - qvalue.QValueKindTimestampTZ, - qvalue.QValueKindTime, - qvalue.QValueKindTimeTZ, - qvalue.QValueKindDate, - qvalue.QValueKindNumeric, - qvalue.QValueKindBytes, - qvalue.QValueKindUUID, - qvalue.QValueKindQChar, - // qvalue.QValueKindJSON, + allQValueKinds := []types.QValueKind{ + types.QValueKindFloat32, + types.QValueKindFloat64, + types.QValueKindInt16, + types.QValueKindInt32, + types.QValueKindInt64, + types.QValueKindBoolean, + // types.QValueKindArray, + types.QValueKindString, + types.QValueKindTimestamp, + types.QValueKindTimestampTZ, + types.QValueKindTime, + types.QValueKindTimeTZ, + types.QValueKindDate, + types.QValueKindNumeric, + types.QValueKindBytes, + types.QValueKindUUID, + types.QValueKindQChar, + // types.QValueKindJSON, + types.QValueKindArrayNumeric, } numKinds := len(allQValueKinds) - schema := qvalue.QRecordSchema{ - Fields: make([]qvalue.QField, numKinds), + schema := types.QRecordSchema{ + Fields: make([]types.QField, numKinds), } for i, kind := range allQValueKinds { - schema.Fields[i] = qvalue.QField{ + schema.Fields[i] = types.QField{ Name: string(kind), Type: kind, Nullable: nullable, @@ -110,15 +115,15 @@ func generateRecords( // Create sample records records := &model.QRecordBatch{ Schema: schema, - Records: make([][]qvalue.QValue, numRows), + Records: make([][]types.QValue, numRows), } for row := range numRows { - entries := make([]qvalue.QValue, len(allQValueKinds)) + entries := make([]types.QValue, len(allQValueKinds)) for i, kind := range allQValueKinds { if allnulls { - entries[i] = qvalue.QValueNull(kind) + entries[i] = types.QValueNull(kind) } else { entries[i] = createQValue(t, kind, int(row)*i) } @@ -147,7 +152,7 @@ func TestWriteRecordsToAvroFileHappyPath(t *testing.T) { t.Logf("[test] avroSchema: %v", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(records, avroSchema, ocf.Null, protos.DBType_SNOWFLAKE) + writer := utils.NewPeerDBOCFWriter(records, avroSchema, ocf.Null, protos.DBType_SNOWFLAKE) _, err = writer.WriteRecordsToAvroFile(t.Context(), nil, tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") @@ -174,7 +179,7 @@ func TestWriteRecordsToZstdAvroFileHappyPath(t *testing.T) { t.Logf("[test] avroSchema: %v", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(records, avroSchema, ocf.ZStandard, protos.DBType_SNOWFLAKE) + writer := utils.NewPeerDBOCFWriter(records, avroSchema, ocf.ZStandard, protos.DBType_SNOWFLAKE) _, err = writer.WriteRecordsToAvroFile(t.Context(), nil, tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") @@ -201,7 +206,7 @@ func TestWriteRecordsToDeflateAvroFileHappyPath(t *testing.T) { t.Logf("[test] avroSchema: %v", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(records, avroSchema, ocf.Deflate, protos.DBType_SNOWFLAKE) + writer := utils.NewPeerDBOCFWriter(records, avroSchema, ocf.Deflate, protos.DBType_SNOWFLAKE) _, err = writer.WriteRecordsToAvroFile(t.Context(), nil, tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") @@ -227,7 +232,7 @@ func TestWriteRecordsToAvroFileNonNull(t *testing.T) { t.Logf("[test] avroSchema: %v", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(records, avroSchema, ocf.Null, protos.DBType_SNOWFLAKE) + writer := utils.NewPeerDBOCFWriter(records, avroSchema, ocf.Null, protos.DBType_SNOWFLAKE) _, err = writer.WriteRecordsToAvroFile(t.Context(), nil, tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") @@ -254,7 +259,7 @@ func TestWriteRecordsToAvroFileAllNulls(t *testing.T) { t.Logf("[test] avroSchema: %v", avroSchema) // Call function - writer := avro.NewPeerDBOCFWriter(records, avroSchema, ocf.Null, protos.DBType_SNOWFLAKE) + writer := utils.NewPeerDBOCFWriter(records, avroSchema, ocf.Null, protos.DBType_SNOWFLAKE) _, err = writer.WriteRecordsToAvroFile(t.Context(), nil, tmpfile.Name()) require.NoError(t, err, "expected WriteRecordsToAvroFile to complete without errors") diff --git a/flow/connectors/snowflake/client.go b/flow/connectors/snowflake/client.go index 159e7f2901..8b92bbe34d 100644 --- a/flow/connectors/snowflake/client.go +++ b/flow/connectors/snowflake/client.go @@ -12,7 +12,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func SnowflakeIdentifierNormalize(identifier string) string { @@ -37,15 +37,15 @@ func snowflakeSchemaTableNormalize(schemaTable *utils.SchemaTable) string { SnowflakeIdentifierNormalize(schemaTable.Table)) } -func (c *SnowflakeConnector) columnTypeToQField(ct *sql.ColumnType) (qvalue.QField, error) { +func (c *SnowflakeConnector) columnTypeToQField(ct *sql.ColumnType) (types.QField, error) { qvKind, ok := snowflakeTypeToQValueKindMap[ct.DatabaseTypeName()] if !ok { - return qvalue.QField{}, fmt.Errorf("unsupported database type %s", ct.DatabaseTypeName()) + return types.QField{}, fmt.Errorf("unsupported database type %s", ct.DatabaseTypeName()) } nullable, ok := ct.Nullable() - return qvalue.QField{ + return types.QField{ Name: ct.Name(), Type: qvKind, Nullable: ok && nullable, @@ -59,7 +59,7 @@ func (c *SnowflakeConnector) processRows(rows *sql.Rows) (*model.QRecordBatch, e } // Convert dbColTypes to QFields - qfields := make([]qvalue.QField, len(dbColTypes)) + qfields := make([]types.QField, len(dbColTypes)) for i, ct := range dbColTypes { qfield, err := c.columnTypeToQField(ct) if err != nil { @@ -70,7 +70,7 @@ func (c *SnowflakeConnector) processRows(rows *sql.Rows) (*model.QRecordBatch, e qfields[i] = qfield } - var records [][]qvalue.QValue + var records [][]types.QValue totalRowsProcessed := 0 const logEveryNumRows = 50000 @@ -83,27 +83,27 @@ func (c *SnowflakeConnector) processRows(rows *sql.Rows) (*model.QRecordBatch, e values := make([]any, len(columns)) for i := range values { switch qfields[i].Type { - case qvalue.QValueKindTimestamp, qvalue.QValueKindTimestampTZ, qvalue.QValueKindTime, qvalue.QValueKindDate: + case types.QValueKindTimestamp, types.QValueKindTimestampTZ, types.QValueKindTime, types.QValueKindDate: var t sql.NullTime values[i] = &t - case qvalue.QValueKindInt32: + case types.QValueKindInt32: var n sql.NullInt32 values[i] = &n - case qvalue.QValueKindInt64: + case types.QValueKindInt64: var n sql.NullInt64 values[i] = &n - case qvalue.QValueKindFloat64: + case types.QValueKindFloat64: var f sql.NullFloat64 values[i] = &f - case qvalue.QValueKindBoolean: + case types.QValueKindBoolean: var b sql.NullBool values[i] = &b - case qvalue.QValueKindString, qvalue.QValueKindHStore: + case types.QValueKindString, types.QValueKindHStore: var s sql.NullString values[i] = &s - case qvalue.QValueKindBytes: + case types.QValueKindBytes: values[i] = new([]byte) - case qvalue.QValueKindNumeric: + case types.QValueKindNumeric: var s sql.Null[decimal.Decimal] values[i] = &s default: @@ -115,7 +115,7 @@ func (c *SnowflakeConnector) processRows(rows *sql.Rows) (*model.QRecordBatch, e return nil, err } - qValues := make([]qvalue.QValue, len(values)) + qValues := make([]types.QValue, len(values)) for i, val := range values { qv, err := toQValue(qfields[i].Type, val) if err != nil { @@ -139,7 +139,7 @@ func (c *SnowflakeConnector) processRows(rows *sql.Rows) (*model.QRecordBatch, e } return &model.QRecordBatch{ - Schema: qvalue.NewQRecordSchema(qfields), + Schema: types.NewQRecordSchema(qfields), Records: records, }, nil } @@ -158,108 +158,111 @@ func (c *SnowflakeConnector) ExecuteAndProcessQuery( return c.processRows(rows) } -func toQValue(kind qvalue.QValueKind, val any) (qvalue.QValue, error) { +func toQValue(kind types.QValueKind, val any) (types.QValue, error) { if val == nil { - return qvalue.QValueNull(kind), nil + return types.QValueNull(kind), nil } switch kind { - case qvalue.QValueKindInt32: + case types.QValueKindInt32: if v, ok := val.(*sql.NullInt32); ok { if v.Valid { - return qvalue.QValueInt32{Val: v.Int32}, nil + return types.QValueInt32{Val: v.Int32}, nil } else { - return qvalue.QValueNull(qvalue.QValueKindInt32), nil + return types.QValueNull(types.QValueKindInt32), nil } } - case qvalue.QValueKindInt64: + case types.QValueKindInt64: if v, ok := val.(*sql.NullInt64); ok { if v.Valid { - return qvalue.QValueInt64{Val: v.Int64}, nil + return types.QValueInt64{Val: v.Int64}, nil } else { - return qvalue.QValueNull(qvalue.QValueKindInt64), nil + return types.QValueNull(types.QValueKindInt64), nil } } - case qvalue.QValueKindFloat64: + case types.QValueKindFloat64: if v, ok := val.(*sql.NullFloat64); ok { if v.Valid { - return qvalue.QValueFloat64{Val: v.Float64}, nil + return types.QValueFloat64{Val: v.Float64}, nil } else { - return qvalue.QValueNull(qvalue.QValueKindFloat64), nil + return types.QValueNull(types.QValueKindFloat64), nil } } - case qvalue.QValueKindString: + case types.QValueKindString: if v, ok := val.(*sql.NullString); ok { if v.Valid { - return qvalue.QValueString{Val: v.String}, nil + return types.QValueString{Val: v.String}, nil } else { - return qvalue.QValueNull(qvalue.QValueKindString), nil + return types.QValueNull(types.QValueKindString), nil } } - case qvalue.QValueKindBoolean: + case types.QValueKindBoolean: if v, ok := val.(*sql.NullBool); ok { if v.Valid { - return qvalue.QValueBoolean{Val: v.Bool}, nil + return types.QValueBoolean{Val: v.Bool}, nil } else { - return qvalue.QValueNull(qvalue.QValueKindBoolean), nil + return types.QValueNull(types.QValueKindBoolean), nil } } - case qvalue.QValueKindTimestamp: + case types.QValueKindTimestamp: if t, ok := val.(*sql.NullTime); ok { if t.Valid { - return qvalue.QValueTimestamp{Val: t.Time}, nil + return types.QValueTimestamp{Val: t.Time}, nil } else { - return qvalue.QValueNull(kind), nil + return types.QValueNull(kind), nil } } - case qvalue.QValueKindTimestampTZ: + case types.QValueKindTimestampTZ: if t, ok := val.(*sql.NullTime); ok { if t.Valid { - return qvalue.QValueTimestampTZ{Val: t.Time}, nil + return types.QValueTimestampTZ{Val: t.Time}, nil } else { - return qvalue.QValueNull(kind), nil + return types.QValueNull(kind), nil } } - case qvalue.QValueKindDate: + case types.QValueKindDate: if t, ok := val.(*sql.NullTime); ok { if t.Valid { - return qvalue.QValueDate{Val: t.Time}, nil + return types.QValueDate{Val: t.Time}, nil } else { - return qvalue.QValueNull(kind), nil + return types.QValueNull(kind), nil } } - case qvalue.QValueKindTime: + case types.QValueKindTime: if t, ok := val.(*sql.NullTime); ok { if t.Valid { tt := t.Time - // anchor on unix epoch, some drivers anchor on 0001-01-01 - return qvalue.QValueTime{ - Val: time.Date(1970, time.January, 1, tt.Hour(), tt.Minute(), tt.Second(), tt.Nanosecond(), time.UTC), + h, m, s := tt.Clock() + return types.QValueTime{ + Val: time.Duration(h)*time.Hour + + time.Duration(m)*time.Minute + + time.Duration(s)*time.Second + + time.Duration(tt.Nanosecond()), }, nil } else { - return qvalue.QValueNull(kind), nil + return types.QValueNull(kind), nil } } - case qvalue.QValueKindNumeric: + case types.QValueKindNumeric: if v, ok := val.(*sql.Null[decimal.Decimal]); ok { if v.Valid { - return qvalue.QValueNumeric{Val: v.V}, nil + return types.QValueNumeric{Val: v.V}, nil } else { - return qvalue.QValueNull(qvalue.QValueKindNumeric), nil + return types.QValueNull(types.QValueKindNumeric), nil } } - case qvalue.QValueKindBytes: + case types.QValueKindBytes: if v, ok := val.(*[]byte); ok && v != nil { - return qvalue.QValueBytes{Val: *v}, nil + return types.QValueBytes{Val: *v}, nil } - case qvalue.QValueKindJSON: + case types.QValueKindJSON: vraw := val.(*any) vstring, ok := (*vraw).(string) if !ok { slog.Warn("A parsed JSON value was not a string. Likely a null field value") } - return qvalue.QValueJSON{Val: vstring}, nil + return types.QValueJSON{Val: vstring}, nil } // If type is unsupported or doesn't match the specified kind, return error diff --git a/flow/connectors/snowflake/get_schema_for_tests.go b/flow/connectors/snowflake/get_schema_for_tests.go index 4f504382ee..54265246e7 100644 --- a/flow/connectors/snowflake/get_schema_for_tests.go +++ b/flow/connectors/snowflake/get_schema_for_tests.go @@ -4,9 +4,9 @@ import ( "context" "slices" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func (c *SnowflakeConnector) getTableSchemaForTable(ctx context.Context, tm *protos.TableMapping) (*protos.TableSchema, error) { @@ -24,7 +24,7 @@ func (c *SnowflakeConnector) getTableSchemaForTable(ctx context.Context, tm *pro genericColType, err := snowflakeTypeToQValueKind(sfColumn.ColumnType) if err != nil { // we use string for invalid types - genericColType = qvalue.QValueKindString + genericColType = types.QValueKindString } colFields = append(colFields, &protos.FieldDescription{ @@ -45,6 +45,7 @@ func (c *SnowflakeConnector) getTableSchemaForTable(ctx context.Context, tm *pro func (c *SnowflakeConnector) GetTableSchema( ctx context.Context, _env map[string]string, + _version uint32, _system protos.TypeSystem, tableMappings []*protos.TableMapping, ) (map[string]*protos.TableSchema, error) { diff --git a/flow/connectors/snowflake/merge_stmt_generator.go b/flow/connectors/snowflake/merge_stmt_generator.go index 1450117bf0..a2cabe4d1c 100644 --- a/flow/connectors/snowflake/merge_stmt_generator.go +++ b/flow/connectors/snowflake/merge_stmt_generator.go @@ -6,10 +6,11 @@ import ( "strings" "github.com/PeerDB-io/peerdb/flow/connectors/utils" - numeric "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + numeric "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type mergeStmtGenerator struct { @@ -34,35 +35,30 @@ func (m *mergeStmtGenerator) generateMergeStmt(ctx context.Context, env map[stri flattenedCastsSQLArray := make([]string, 0, len(columns)) for _, column := range columns { genericColumnType := column.Type - qvKind := qvalue.QValueKind(genericColumnType) - sfType, err := qvKind.ToDWHColumnType(ctx, env, protos.DBType_SNOWFLAKE, column, normalizedTableSchema.NullableEnabled) + qvKind := types.QValueKind(genericColumnType) + sfType, err := qvalue.ToDWHColumnType(ctx, qvKind, env, protos.DBType_SNOWFLAKE, column, normalizedTableSchema.NullableEnabled) if err != nil { return "", fmt.Errorf("failed to convert column type %s to snowflake type: %w", genericColumnType, err) } targetColumnName := SnowflakeIdentifierNormalize(column.Name) switch qvKind { - case qvalue.QValueKindBytes: + case types.QValueKindBytes: flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("BASE64_DECODE_BINARY(%s:\"%s\") "+ "AS %s", toVariantColumnName, column.Name, targetColumnName)) - case qvalue.QValueKindGeography: + case types.QValueKindGeography: flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("TO_GEOGRAPHY(CAST(%s:\"%s\" AS STRING),true) AS %s", toVariantColumnName, column.Name, targetColumnName)) - case qvalue.QValueKindGeometry: + case types.QValueKindGeometry: flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("TO_GEOMETRY(CAST(%s:\"%s\" AS STRING),true) AS %s", toVariantColumnName, column.Name, targetColumnName)) - case qvalue.QValueKindJSON, qvalue.QValueKindJSONB, qvalue.QValueKindHStore, qvalue.QValueKindInterval: + case types.QValueKindJSON, types.QValueKindJSONB, types.QValueKindHStore, types.QValueKindInterval: flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("PARSE_JSON(CAST(%s:\"%s\" AS STRING)) AS %s", toVariantColumnName, column.Name, targetColumnName)) - // TODO: https://github.com/PeerDB-io/peerdb/issues/189 - handle time types and interval types - // case model.ColumnTypeTime: - // flattenedCastsSQLArray = append(flattenedCastsSQLArray, fmt.Sprintf("TIME_FROM_PARTS(0,0,0,%s:%s:"+ - // "Microseconds*1000) "+ - // "AS %s", toVariantColumnName, columnName, columnName)) - case qvalue.QValueKindNumeric: + case types.QValueKindNumeric: precision, scale := numeric.GetNumericTypeForWarehouse(column.TypeModifier, numeric.SnowflakeNumericCompatibility{}) numericType := fmt.Sprintf("NUMERIC(%d,%d)", precision, scale) flattenedCastsSQLArray = append(flattenedCastsSQLArray, diff --git a/flow/connectors/snowflake/qrep.go b/flow/connectors/snowflake/qrep.go index 84f4027d01..6402795616 100644 --- a/flow/connectors/snowflake/qrep.go +++ b/flow/connectors/snowflake/qrep.go @@ -29,7 +29,7 @@ func (c *SnowflakeConnector) SyncQRepRecords( config *protos.QRepConfig, partition *protos.QRepPartition, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { ctx = c.withMirrorNameQueryTag(ctx, config.FlowJobName) // Ensure the destination table is available. @@ -40,7 +40,7 @@ func (c *SnowflakeConnector) SyncQRepRecords( ) tblSchema, err := c.getTableSchema(ctx, destTable) if err != nil { - return 0, fmt.Errorf("failed to get schema of table %s: %w", destTable, err) + return 0, nil, fmt.Errorf("failed to get schema of table %s: %w", destTable, err) } c.logger.Info("Called QRep sync function and obtained table schema", flowLog) @@ -104,21 +104,16 @@ func (c *SnowflakeConnector) createStage(ctx context.Context, stageName string, } createStageStmt = stmt } else { - stageStatement := ` - CREATE OR REPLACE STAGE %s - FILE_FORMAT = (TYPE = AVRO); - ` - createStageStmt = fmt.Sprintf(stageStatement, stageName) + createStageStmt = fmt.Sprintf(`CREATE OR REPLACE STAGE %s FILE_FORMAT = (TYPE = AVRO)`, stageName) } // Execute the query - _, err := c.execWithLogging(ctx, createStageStmt) - if err != nil { - c.logger.Error("failed to create stage "+stageName, slog.Any("error", err)) + if _, err := c.execWithLogging(ctx, createStageStmt); err != nil { + c.logger.Error("failed to create stage", slog.String("stage", stageName), slog.Any("error", err)) return fmt.Errorf("failed to create stage %s: %w", stageName, err) } - c.logger.Info("Created stage " + stageName) + c.logger.Info("Created stage", slog.String("stage", stageName)) return nil } @@ -131,7 +126,6 @@ func (c *SnowflakeConnector) createExternalStage(ctx context.Context, stageName cleanURL := fmt.Sprintf("s3://%s/%s/%s", s3o.Bucket, s3o.Prefix, config.FlowJobName) - s3Int := c.config.S3Integration provider, err := utils.GetAWSCredentialsProvider(ctx, "snowflake", utils.PeerAWSCredentials{}) if err != nil { return "", err @@ -141,7 +135,7 @@ func (c *SnowflakeConnector) createExternalStage(ctx context.Context, stageName if err != nil { return "", err } - if s3Int == "" { + if c.config.S3Integration == "" { credsStr := fmt.Sprintf("CREDENTIALS=(AWS_KEY_ID='%s' AWS_SECRET_KEY='%s' AWS_TOKEN='%s')", creds.AWS.AccessKeyID, creds.AWS.SecretAccessKey, creds.AWS.SessionToken) stageStatement := ` @@ -156,7 +150,7 @@ func (c *SnowflakeConnector) createExternalStage(ctx context.Context, stageName URL = '%s' STORAGE_INTEGRATION = %s FILE_FORMAT = (TYPE = AVRO);` - return fmt.Sprintf(stageStatement, stageName, cleanURL, s3Int), nil + return fmt.Sprintf(stageStatement, stageName, cleanURL, c.config.S3Integration), nil } } diff --git a/flow/connectors/snowflake/qrep_avro_sync.go b/flow/connectors/snowflake/qrep_avro_sync.go index 0380690e3b..b325ca4980 100644 --- a/flow/connectors/snowflake/qrep_avro_sync.go +++ b/flow/connectors/snowflake/qrep_avro_sync.go @@ -13,11 +13,10 @@ import ( _ "github.com/snowflakedb/gosnowflake" "github.com/PeerDB-io/peerdb/flow/connectors/utils" - avro "github.com/PeerDB-io/peerdb/flow/connectors/utils/avro" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type SnowflakeAvroSyncHandler struct { @@ -92,47 +91,47 @@ func (s *SnowflakeAvroSyncHandler) SyncQRepRecords( partition *protos.QRepPartition, dstTableSchema []*sql.ColumnType, stream *model.QRecordStream, -) (int64, error) { +) (int64, shared.QRepWarnings, error) { partitionLog := slog.String(string(shared.PartitionIDKey), partition.PartitionId) startTime := time.Now() dstTableName := config.DestinationTableIdentifier schema, err := stream.Schema() if err != nil { - return 0, err + return 0, nil, err } s.logger.Info("sync function called and schema acquired", partitionLog) avroSchema, err := s.getAvroSchema(ctx, config.Env, dstTableName, schema) if err != nil { - return 0, err + return 0, nil, err } avroFile, err := s.writeToAvroFile(ctx, config.Env, stream, avroSchema, partition.PartitionId, config.FlowJobName) if err != nil { - return 0, err + return 0, nil, err } defer avroFile.Cleanup() stage := s.getStageNameForJob(config.FlowJobName) if err := s.putFileToStage(ctx, avroFile, stage); err != nil { - return 0, err + return 0, nil, err } s.logger.Info("Put file to stage in Avro sync for snowflake", partitionLog) if err := s.FinishQRepPartition(ctx, partition, config.FlowJobName, startTime); err != nil { - return 0, err + return 0, nil, err } - return avroFile.NumRecords, nil + return avroFile.NumRecords, nil, nil } func (s *SnowflakeAvroSyncHandler) getAvroSchema( ctx context.Context, env map[string]string, dstTableName string, - schema qvalue.QRecordSchema, + schema types.QRecordSchema, ) (*model.QRecordAvroSchemaDefinition, error) { // TODO: Support avroNameMap for avro-incompatible column name support avroSchema, err := model.GetAvroSchemaDefinition(ctx, env, dstTableName, schema, protos.DBType_SNOWFLAKE, nil) @@ -151,28 +150,28 @@ func (s *SnowflakeAvroSyncHandler) writeToAvroFile( avroSchema *model.QRecordAvroSchemaDefinition, partitionID string, flowJobName string, -) (*avro.AvroFile, error) { +) (utils.AvroFile, error) { if s.config.StagingPath == "" { - ocfWriter := avro.NewPeerDBOCFWriter(stream, avroSchema, ocf.ZStandard, protos.DBType_SNOWFLAKE) + ocfWriter := utils.NewPeerDBOCFWriter(stream, avroSchema, ocf.ZStandard, protos.DBType_SNOWFLAKE) tmpDir := fmt.Sprintf("%s/peerdb-avro-%s", os.TempDir(), flowJobName) err := os.MkdirAll(tmpDir, os.ModePerm) if err != nil { - return nil, fmt.Errorf("failed to create temp dir: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to create temp dir: %w", err) } localFilePath := fmt.Sprintf("%s/%s.avro", tmpDir, partitionID) s.logger.Info("writing records to local file " + localFilePath) avroFile, err := ocfWriter.WriteRecordsToAvroFile(ctx, env, localFilePath) if err != nil { - return nil, fmt.Errorf("failed to write records to Avro file: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to write records to Avro file: %w", err) } return avroFile, nil } else if strings.HasPrefix(s.config.StagingPath, "s3://") { - ocfWriter := avro.NewPeerDBOCFWriter(stream, avroSchema, ocf.ZStandard, protos.DBType_SNOWFLAKE) + ocfWriter := utils.NewPeerDBOCFWriter(stream, avroSchema, ocf.ZStandard, protos.DBType_SNOWFLAKE) s3o, err := utils.NewS3BucketAndPrefix(s.config.StagingPath) if err != nil { - return nil, fmt.Errorf("failed to parse staging path: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to parse staging path: %w", err) } s3AvroFileKey := fmt.Sprintf("%s/%s/%s.avro", s3o.Prefix, s.config.FlowJobName, partitionID) @@ -181,21 +180,21 @@ func (s *SnowflakeAvroSyncHandler) writeToAvroFile( provider, err := utils.GetAWSCredentialsProvider(ctx, "snowflake", utils.PeerAWSCredentials{}) if err != nil { - return nil, err + return utils.AvroFile{}, err } - avroFile, err := ocfWriter.WriteRecordsToS3(ctx, env, s3o.Bucket, s3AvroFileKey, provider, nil, nil) + avroFile, err := ocfWriter.WriteRecordsToS3(ctx, env, s3o.Bucket, s3AvroFileKey, provider, nil, nil, nil) if err != nil { - return nil, fmt.Errorf("failed to write records to S3: %w", err) + return utils.AvroFile{}, fmt.Errorf("failed to write records to S3: %w", err) } return avroFile, nil } - return nil, fmt.Errorf("unsupported staging path: %s", s.config.StagingPath) + return utils.AvroFile{}, fmt.Errorf("unsupported staging path: %s", s.config.StagingPath) } -func (s *SnowflakeAvroSyncHandler) putFileToStage(ctx context.Context, avroFile *avro.AvroFile, stage string) error { - if avroFile.StorageLocation != avro.AvroLocalStorage { +func (s *SnowflakeAvroSyncHandler) putFileToStage(ctx context.Context, avroFile utils.AvroFile, stage string) error { + if avroFile.StorageLocation != utils.AvroLocalStorage { s.logger.Info("no file to put to stage") return nil } diff --git a/flow/connectors/snowflake/qvalue_convert.go b/flow/connectors/snowflake/qvalue_convert.go index df86b32b15..b0816e11b0 100644 --- a/flow/connectors/snowflake/qvalue_convert.go +++ b/flow/connectors/snowflake/qvalue_convert.go @@ -3,38 +3,38 @@ package connsnowflake import ( "fmt" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) -var snowflakeTypeToQValueKindMap = map[string]qvalue.QValueKind{ - "INT": qvalue.QValueKindInt32, - "BIGINT": qvalue.QValueKindInt64, - "FLOAT": qvalue.QValueKindFloat64, - "DOUBLE": qvalue.QValueKindFloat64, - "REAL": qvalue.QValueKindFloat64, - "VARCHAR": qvalue.QValueKindString, - "CHAR": qvalue.QValueKindString, - "TEXT": qvalue.QValueKindString, - "BOOLEAN": qvalue.QValueKindBoolean, - "DATETIME": qvalue.QValueKindTimestamp, - "TIMESTAMP": qvalue.QValueKindTimestamp, - "TIMESTAMP_NTZ": qvalue.QValueKindTimestamp, - "TIMESTAMP_TZ": qvalue.QValueKindTimestampTZ, - "TIME": qvalue.QValueKindTime, - "DATE": qvalue.QValueKindDate, - "BLOB": qvalue.QValueKindBytes, - "BYTEA": qvalue.QValueKindBytes, - "BINARY": qvalue.QValueKindBytes, - "FIXED": qvalue.QValueKindNumeric, - "NUMBER": qvalue.QValueKindNumeric, - "DECIMAL": qvalue.QValueKindNumeric, - "NUMERIC": qvalue.QValueKindNumeric, - "VARIANT": qvalue.QValueKindJSON, - "GEOMETRY": qvalue.QValueKindGeometry, - "GEOGRAPHY": qvalue.QValueKindGeography, +var snowflakeTypeToQValueKindMap = map[string]types.QValueKind{ + "INT": types.QValueKindInt32, + "BIGINT": types.QValueKindInt64, + "FLOAT": types.QValueKindFloat64, + "DOUBLE": types.QValueKindFloat64, + "REAL": types.QValueKindFloat64, + "VARCHAR": types.QValueKindString, + "CHAR": types.QValueKindString, + "TEXT": types.QValueKindString, + "BOOLEAN": types.QValueKindBoolean, + "DATETIME": types.QValueKindTimestamp, + "TIMESTAMP": types.QValueKindTimestamp, + "TIMESTAMP_NTZ": types.QValueKindTimestamp, + "TIMESTAMP_TZ": types.QValueKindTimestampTZ, + "TIME": types.QValueKindTime, + "DATE": types.QValueKindDate, + "BLOB": types.QValueKindBytes, + "BYTEA": types.QValueKindBytes, + "BINARY": types.QValueKindBytes, + "FIXED": types.QValueKindNumeric, + "NUMBER": types.QValueKindNumeric, + "DECIMAL": types.QValueKindNumeric, + "NUMERIC": types.QValueKindNumeric, + "VARIANT": types.QValueKindJSON, + "GEOMETRY": types.QValueKindGeometry, + "GEOGRAPHY": types.QValueKindGeography, } -func snowflakeTypeToQValueKind(name string) (qvalue.QValueKind, error) { +func snowflakeTypeToQValueKind(name string) (types.QValueKind, error) { if val, ok := snowflakeTypeToQValueKindMap[name]; ok { return val, nil } diff --git a/flow/connectors/snowflake/snowflake.go b/flow/connectors/snowflake/snowflake.go index 0c50f0fbc7..5ee562f04e 100644 --- a/flow/connectors/snowflake/snowflake.go +++ b/flow/connectors/snowflake/snowflake.go @@ -24,6 +24,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) const ( @@ -359,8 +360,9 @@ func (c *SnowflakeConnector) ReplayTableSchemaDeltas( } for _, addedColumn := range schemaDelta.AddedColumns { - sfColtype, err := qvalue.QValueKind(addedColumn.Type).ToDWHColumnType( - ctx, env, protos.DBType_SNOWFLAKE, addedColumn, schemaDelta.NullableEnabled, + qvKind := types.QValueKind(addedColumn.Type) + sfColtype, err := qvalue.ToDWHColumnType( + ctx, qvKind, env, protos.DBType_SNOWFLAKE, addedColumn, schemaDelta.NullableEnabled, ) if err != nil { return fmt.Errorf("failed to convert column type %s to snowflake type: %w", @@ -418,8 +420,10 @@ func (c *SnowflakeConnector) syncRecordsViaAvro( syncBatchID int64, ) (*model.SyncResponse, error) { tableNameRowsMapping := utils.InitialiseTableRowsMap(req.TableMappings) - streamReq := model.NewRecordsToStreamRequest(req.Records.GetRecords(), tableNameRowsMapping, syncBatchID) - stream, err := utils.RecordsToRawTableStream(streamReq) + streamReq := model.NewRecordsToStreamRequest( + req.Records.GetRecords(), tableNameRowsMapping, syncBatchID, false, protos.DBType_SNOWFLAKE, + ) + stream, err := utils.RecordsToRawTableStream(streamReq, nil) if err != nil { return nil, fmt.Errorf("failed to convert records to raw table stream: %w", err) } @@ -429,7 +433,8 @@ func (c *SnowflakeConnector) syncRecordsViaAvro( FlowJobName: req.FlowJobName, DestinationTableIdentifier: strings.ToLower(fmt.Sprintf("%s.%s", c.rawSchema, rawTableIdentifier)), - Env: req.Env, + Env: req.Env, + Version: req.Version, } avroSyncer := NewSnowflakeAvroSyncHandler(qrepConfig, c) destinationTableSchema, err := c.getTableSchema(ctx, qrepConfig.DestinationTableIdentifier) @@ -654,8 +659,9 @@ func generateCreateTableSQLForNormalizedTable( for _, column := range tableSchema.Columns { genericColumnType := column.Type normalizedColName := SnowflakeIdentifierNormalize(column.Name) - sfColType, err := qvalue.QValueKind(genericColumnType).ToDWHColumnType( - ctx, config.Env, protos.DBType_SNOWFLAKE, column, tableSchema.NullableEnabled, + qvKind := types.QValueKind(genericColumnType) + sfColType, err := qvalue.ToDWHColumnType( + ctx, qvKind, config.Env, protos.DBType_SNOWFLAKE, column, tableSchema.NullableEnabled, ) if err != nil { slog.Warn(fmt.Sprintf("failed to convert column type %s to snowflake type", genericColumnType), diff --git a/flow/connectors/utils/avro/avro_writer.go b/flow/connectors/utils/avro_writer.go similarity index 83% rename from flow/connectors/utils/avro/avro_writer.go rename to flow/connectors/utils/avro_writer.go index 40730cc480..c39226be6d 100644 --- a/flow/connectors/utils/avro/avro_writer.go +++ b/flow/connectors/utils/avro_writer.go @@ -14,16 +14,13 @@ import ( "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" - "github.com/djherbis/buffer" - "github.com/djherbis/nio/v3" "github.com/hamba/avro/v2/ocf" - "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type ( @@ -51,8 +48,7 @@ type AvroFile struct { func (l *AvroFile) Cleanup() { if l.StorageLocation == AvroLocalStorage { - err := os.Remove(l.FilePath) - if err != nil && !os.IsNotExist(err) { + if err := os.Remove(l.FilePath); err != nil && !os.IsNotExist(err) { slog.Warn("unable to delete temporary Avro file", slog.Any("error", err)) } } @@ -76,7 +72,8 @@ func (p *peerDBOCFWriter) WriteOCF( ctx context.Context, env map[string]string, w io.Writer, - typeConversions map[string]qvalue.TypeConversion, + typeConversions map[string]types.TypeConversion, + numericTruncator *model.SnapshotTableNumericTruncator, ) (int64, error) { ocfWriter, err := p.createOCFWriter(w) if err != nil { @@ -84,7 +81,7 @@ func (p *peerDBOCFWriter) WriteOCF( } defer ocfWriter.Close() - numRows, err := p.writeRecordsToOCFWriter(ctx, env, ocfWriter, typeConversions) + numRows, err := p.writeRecordsToOCFWriter(ctx, env, ocfWriter, typeConversions, numericTruncator) if err != nil { return 0, fmt.Errorf("failed to write records to OCF writer: %w", err) } @@ -96,21 +93,21 @@ func (p *peerDBOCFWriter) WriteRecordsToS3( env map[string]string, bucketName string, key string, - s3Creds utils.AWSCredentialsProvider, + s3Creds AWSCredentialsProvider, avroSize *atomic.Int64, - typeConversions map[string]qvalue.TypeConversion, -) (*AvroFile, error) { + typeConversions map[string]types.TypeConversion, + numericTruncator *model.SnapshotTableNumericTruncator, +) (AvroFile, error) { logger := internal.LoggerFromCtx(ctx) - s3svc, err := utils.CreateS3Client(ctx, s3Creds) + s3svc, err := CreateS3Client(ctx, s3Creds) if err != nil { logger.Error("failed to create S3 client", slog.Any("error", err)) - return nil, fmt.Errorf("failed to create S3 client: %w", err) + return AvroFile{}, fmt.Errorf("failed to create S3 client: %w", err) } - buf := buffer.New(32 * 1024 * 1024) // 32MB in memory Buffer - r, w := nio.Pipe(buf) - + r, w := io.Pipe() defer r.Close() + var writeOcfError error var numRows int64 @@ -129,12 +126,12 @@ func (p *peerDBOCFWriter) WriteRecordsToS3( } else { writer = shared.NewWatchWriter(w, avroSize) } - numRows, writeOcfError = p.WriteOCF(ctx, env, writer, typeConversions) + numRows, writeOcfError = p.WriteOCF(ctx, env, writer, typeConversions, numericTruncator) }() partSize, err := internal.PeerDBS3PartSize(ctx, env) if err != nil { - return nil, fmt.Errorf("could not get s3 part size config: %w", err) + return AvroFile{}, fmt.Errorf("could not get s3 part size config: %w", err) } // Create the uploader using the AWS SDK v2 manager @@ -151,25 +148,25 @@ func (p *peerDBOCFWriter) WriteRecordsToS3( }); err != nil { s3Path := "s3://" + bucketName + "/" + key logger.Error("failed to upload file", slog.Any("error", err), slog.String("s3_path", s3Path)) - return nil, fmt.Errorf("failed to upload file: %w", err) + return AvroFile{}, fmt.Errorf("failed to upload file: %w", err) } if writeOcfError != nil { logger.Error("failed to write records to OCF", slog.Any("error", writeOcfError)) - return nil, writeOcfError + return AvroFile{}, writeOcfError } - return &AvroFile{ + return AvroFile{ StorageLocation: AvroS3Storage, FilePath: key, NumRecords: numRows, }, nil } -func (p *peerDBOCFWriter) WriteRecordsToAvroFile(ctx context.Context, env map[string]string, filePath string) (*AvroFile, error) { +func (p *peerDBOCFWriter) WriteRecordsToAvroFile(ctx context.Context, env map[string]string, filePath string) (AvroFile, error) { file, err := os.Create(filePath) if err != nil { - return nil, fmt.Errorf("failed to create temporary Avro file: %w", err) + return AvroFile{}, fmt.Errorf("failed to create temporary Avro file: %w", err) } defer file.Close() printFileStats := func(message string) { @@ -187,13 +184,13 @@ func (p *peerDBOCFWriter) WriteRecordsToAvroFile(ctx context.Context, env map[st bufferedWriter := bufio.NewWriterSize(file, buffSizeBytes) defer bufferedWriter.Flush() - numRecords, err := p.WriteOCF(ctx, env, bufferedWriter, nil) + numRecords, err := p.WriteOCF(ctx, env, bufferedWriter, nil, nil) if err != nil { - return nil, fmt.Errorf("failed to write records to temporary Avro file: %w", err) + return AvroFile{}, fmt.Errorf("failed to write records to temporary Avro file: %w", err) } printFileStats("finished writing to temporary Avro file") - return &AvroFile{ + return AvroFile{ NumRecords: numRecords, StorageLocation: AvroLocalStorage, FilePath: filePath, @@ -224,7 +221,8 @@ func (p *peerDBOCFWriter) writeRecordsToOCFWriter( ctx context.Context, env map[string]string, ocfWriter *ocf.Encoder, - typeConversions map[string]qvalue.TypeConversion, + typeConversions map[string]types.TypeConversion, + numericTruncator *model.SnapshotTableNumericTruncator, ) (int64, error) { logger := internal.LoggerFromCtx(ctx) @@ -250,7 +248,7 @@ func (p *peerDBOCFWriter) writeRecordsToOCFWriter( if err := ctx.Err(); err != nil { return numRows.Load(), err } else { - avroMap, err := avroConverter.Convert(ctx, env, qrecord, typeConversions) + avroMap, err := avroConverter.Convert(ctx, env, qrecord, typeConversions, numericTruncator) if err != nil { logger.Error("Failed to convert QRecord to Avro compatible map", slog.Any("error", err)) return numRows.Load(), fmt.Errorf("failed to convert QRecord to Avro compatible map: %w", err) diff --git a/flow/connectors/utils/aws.go b/flow/connectors/utils/aws.go index f98697115b..55c161779e 100644 --- a/flow/connectors/utils/aws.go +++ b/flow/connectors/utils/aws.go @@ -2,6 +2,7 @@ package utils import ( "context" + "crypto/tls" "fmt" "net/http" "net/url" @@ -21,7 +22,9 @@ import ( "github.com/aws/smithy-go/ptr" "github.com/google/uuid" + "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared" ) const ( @@ -45,15 +48,26 @@ type PeerAWSCredentials struct { ChainedRoleArn *string EndpointUrl *string Region string + RootCAs *string + TlsHost string } -type S3PeerCredentials struct { - AccessKeyID string `json:"accessKeyId"` - SecretAccessKey string `json:"secretAccessKey"` - AwsRoleArn string `json:"awsRoleArn"` - SessionToken string `json:"sessionToken"` - Region string `json:"region"` - Endpoint string `json:"endpoint"` +func NewPeerAWSCredentials(s3 *protos.S3Config) PeerAWSCredentials { + if s3 == nil { + return PeerAWSCredentials{} + } + return PeerAWSCredentials{ + Credentials: aws.Credentials{ + AccessKeyID: s3.GetAccessKeyId(), + SecretAccessKey: s3.GetSecretAccessKey(), + }, + RoleArn: s3.RoleArn, + ChainedRoleArn: nil, + EndpointUrl: s3.Endpoint, + Region: s3.GetRegion(), + RootCAs: s3.RootCa, + TlsHost: s3.TlsHost, + } } type ClickHouseS3Credentials struct { @@ -71,6 +85,7 @@ type AWSCredentialsProvider interface { GetUnderlyingProvider() aws.CredentialsProvider GetRegion() string GetEndpointURL() string + GetTlsConfig() (*string, string) } type ConfigBasedAWSCredentialsProvider struct { @@ -98,6 +113,10 @@ func (r *ConfigBasedAWSCredentialsProvider) GetEndpointURL() string { return endpoint } +func (r *ConfigBasedAWSCredentialsProvider) GetTlsConfig() (*string, string) { + return nil, "" +} + // Retrieve should be called as late as possible in order to have credentials with latest expiry func (r *ConfigBasedAWSCredentialsProvider) Retrieve(ctx context.Context) (AWSCredentials, error) { retrieved, err := r.config.Credentials.Retrieve(ctx) @@ -113,12 +132,16 @@ func (r *ConfigBasedAWSCredentialsProvider) Retrieve(ctx context.Context) (AWSCr type StaticAWSCredentialsProvider struct { credentials AWSCredentials region string + rootCAs *string + tlsHost string } -func NewStaticAWSCredentialsProvider(credentials AWSCredentials, region string) *StaticAWSCredentialsProvider { +func NewStaticAWSCredentialsProvider(credentials AWSCredentials, region string, rootCAs *string, tlsHost string) *StaticAWSCredentialsProvider { return &StaticAWSCredentialsProvider{ credentials: credentials, region: region, + rootCAs: rootCAs, + tlsHost: tlsHost, } } @@ -142,6 +165,10 @@ func (s *StaticAWSCredentialsProvider) GetEndpointURL() string { return "" } +func (s *StaticAWSCredentialsProvider) GetTlsConfig() (*string, string) { + return s.rootCAs, s.tlsHost +} + type AssumeRoleBasedAWSCredentialsProvider struct { Provider aws.CredentialsProvider // New Credentials config aws.Config // Initial Config @@ -194,6 +221,10 @@ func (a *AssumeRoleBasedAWSCredentialsProvider) GetEndpointURL() string { return endpoint } +func (a *AssumeRoleBasedAWSCredentialsProvider) GetTlsConfig() (*string, string) { + return nil, "" +} + func getPeerDBAWSEnv(connectorName string, awsKey string) string { return os.Getenv(fmt.Sprintf("PEERDB_%s_AWS_CREDENTIALS_%s", strings.ToUpper(connectorName), awsKey)) } @@ -203,6 +234,8 @@ func LoadPeerDBAWSEnvConfigProvider(connectorName string) *StaticAWSCredentialsP secretAccessKey := getPeerDBAWSEnv(connectorName, "AWS_SECRET_ACCESS_KEY") region := getPeerDBAWSEnv(connectorName, "AWS_REGION") endpointUrl := getPeerDBAWSEnv(connectorName, "AWS_ENDPOINT_URL_S3") + rootCa := getPeerDBAWSEnv(connectorName, "ROOT_CA") + tlsHost := getPeerDBAWSEnv(connectorName, "TLS_HOST") var endpointUrlPtr *string if endpointUrl != "" { endpointUrlPtr = &endpointUrl @@ -212,13 +245,18 @@ func LoadPeerDBAWSEnvConfigProvider(connectorName string) *StaticAWSCredentialsP return nil } + var rootCAs *string + if rootCa != "" { + rootCAs = &rootCa + } + return NewStaticAWSCredentialsProvider(AWSCredentials{ AWS: aws.Credentials{ AccessKeyID: accessKeyId, SecretAccessKey: secretAccessKey, }, EndpointUrl: endpointUrlPtr, - }, region) + }, region, rootCAs, tlsHost) } func GetAWSCredentialsProvider(ctx context.Context, connectorName string, peerCredentials PeerAWSCredentials) (AWSCredentialsProvider, error) { @@ -230,7 +268,7 @@ func GetAWSCredentialsProvider(ctx context.Context, connectorName string, peerCr staticProvider := NewStaticAWSCredentialsProvider(AWSCredentials{ AWS: peerCredentials.Credentials, EndpointUrl: peerCredentials.EndpointUrl, - }, peerCredentials.Region) + }, peerCredentials.Region, peerCredentials.RootCAs, peerCredentials.TlsHost) if peerCredentials.RoleArn == nil || *peerCredentials.RoleArn == "" { logger.Info("Received AWS credentials from peer for connector: " + connectorName) return staticProvider, nil @@ -364,6 +402,19 @@ func CreateS3Client(ctx context.Context, credsProvider AWSCredentialsProvider) ( region: options.Region, }, } + } else { + rootCAs, tlsHost := credsProvider.GetTlsConfig() + if rootCAs != nil || tlsHost != "" { + // start with a clone of DefaultTransport so we keep http2, idle-conns, etc. + tlsConfig, err := shared.CreateTlsConfig(tls.VersionTLS13, rootCAs, tlsHost, tlsHost, tlsHost == "") + if err != nil { + return nil, err + } + + tr := http.DefaultTransport.(*http.Transport).Clone() + tr.TLSClientConfig = tlsConfig + options.HTTPClient = &http.Client{Transport: tr} + } } } diff --git a/flow/connectors/utils/cdc_store.go b/flow/connectors/utils/cdc_store.go index a134c04b90..24f3fdc441 100644 --- a/flow/connectors/utils/cdc_store.go +++ b/flow/connectors/utils/cdc_store.go @@ -18,8 +18,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func encVal(val any) ([]byte, error) { @@ -77,52 +77,53 @@ func init() { // register future record classes here as well, if they are passed/stored as interfaces gob.Register(time.Time{}) gob.Register(decimal.Decimal{}) - gob.Register(qvalue.QValueNull("")) - gob.Register(qvalue.QValueInvalid{}) - gob.Register(qvalue.QValueFloat32{}) - gob.Register(qvalue.QValueFloat64{}) - gob.Register(qvalue.QValueInt8{}) - gob.Register(qvalue.QValueInt16{}) - gob.Register(qvalue.QValueInt32{}) - gob.Register(qvalue.QValueInt64{}) - gob.Register(qvalue.QValueUInt8{}) - gob.Register(qvalue.QValueUInt16{}) - gob.Register(qvalue.QValueUInt32{}) - gob.Register(qvalue.QValueUInt64{}) - gob.Register(qvalue.QValueBoolean{}) - gob.Register(qvalue.QValueQChar{}) - gob.Register(qvalue.QValueString{}) - gob.Register(qvalue.QValueEnum{}) - gob.Register(qvalue.QValueTimestamp{}) - gob.Register(qvalue.QValueTimestampTZ{}) - gob.Register(qvalue.QValueDate{}) - gob.Register(qvalue.QValueTime{}) - gob.Register(qvalue.QValueTimeTZ{}) - gob.Register(qvalue.QValueInterval{}) - gob.Register(qvalue.QValueNumeric{}) - gob.Register(qvalue.QValueBytes{}) - gob.Register(qvalue.QValueUUID{}) - gob.Register(qvalue.QValueJSON{}) - gob.Register(qvalue.QValueHStore{}) - gob.Register(qvalue.QValueGeography{}) - gob.Register(qvalue.QValueGeometry{}) - gob.Register(qvalue.QValuePoint{}) - gob.Register(qvalue.QValueCIDR{}) - gob.Register(qvalue.QValueINET{}) - gob.Register(qvalue.QValueMacaddr{}) - gob.Register(qvalue.QValueArrayFloat32{}) - gob.Register(qvalue.QValueArrayFloat64{}) - gob.Register(qvalue.QValueArrayInt16{}) - gob.Register(qvalue.QValueArrayInt32{}) - gob.Register(qvalue.QValueArrayInt64{}) - gob.Register(qvalue.QValueArrayString{}) - gob.Register(qvalue.QValueArrayEnum{}) - gob.Register(qvalue.QValueArrayDate{}) - gob.Register(qvalue.QValueArrayTimestamp{}) - gob.Register(qvalue.QValueArrayTimestampTZ{}) - gob.Register(qvalue.QValueArrayBoolean{}) - gob.Register(qvalue.QValueTSTZRange{}) - gob.Register(qvalue.QValueArrayUUID{}) + gob.Register(types.QValueNull("")) + gob.Register(types.QValueInvalid{}) + gob.Register(types.QValueFloat32{}) + gob.Register(types.QValueFloat64{}) + gob.Register(types.QValueInt8{}) + gob.Register(types.QValueInt16{}) + gob.Register(types.QValueInt32{}) + gob.Register(types.QValueInt64{}) + gob.Register(types.QValueUInt8{}) + gob.Register(types.QValueUInt16{}) + gob.Register(types.QValueUInt32{}) + gob.Register(types.QValueUInt64{}) + gob.Register(types.QValueBoolean{}) + gob.Register(types.QValueQChar{}) + gob.Register(types.QValueString{}) + gob.Register(types.QValueEnum{}) + gob.Register(types.QValueTimestamp{}) + gob.Register(types.QValueTimestampTZ{}) + gob.Register(types.QValueDate{}) + gob.Register(types.QValueTime{}) + gob.Register(types.QValueTimeTZ{}) + gob.Register(types.QValueInterval{}) + gob.Register(types.QValueNumeric{}) + gob.Register(types.QValueBytes{}) + gob.Register(types.QValueUUID{}) + gob.Register(types.QValueJSON{}) + gob.Register(types.QValueHStore{}) + gob.Register(types.QValueGeography{}) + gob.Register(types.QValueGeometry{}) + gob.Register(types.QValuePoint{}) + gob.Register(types.QValueCIDR{}) + gob.Register(types.QValueINET{}) + gob.Register(types.QValueMacaddr{}) + gob.Register(types.QValueArrayFloat32{}) + gob.Register(types.QValueArrayFloat64{}) + gob.Register(types.QValueArrayInt16{}) + gob.Register(types.QValueArrayInt32{}) + gob.Register(types.QValueArrayInt64{}) + gob.Register(types.QValueArrayString{}) + gob.Register(types.QValueArrayEnum{}) + gob.Register(types.QValueArrayDate{}) + gob.Register(types.QValueArrayInterval{}) + gob.Register(types.QValueArrayTimestamp{}) + gob.Register(types.QValueArrayTimestampTZ{}) + gob.Register(types.QValueArrayBoolean{}) + gob.Register(types.QValueArrayUUID{}) + gob.Register(types.QValueArrayNumeric{}) } func (c *cdcStore[T]) initPebbleDB() error { diff --git a/flow/connectors/utils/cdc_store_test.go b/flow/connectors/utils/cdc_store_test.go index 2ede891cc1..f12ca9c2b7 100644 --- a/flow/connectors/utils/cdc_store_test.go +++ b/flow/connectors/utils/cdc_store_test.go @@ -10,27 +10,13 @@ import ( "github.com/stretchr/testify/require" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) -func getTimeForTesting(t *testing.T) time.Time { - t.Helper() - tv, err := time.Parse(time.RFC3339, "2021-08-01T08:02:00Z") - require.NoError(t, err) - - millisToAdd := 716 - tv = tv.Add(time.Millisecond * time.Duration(millisToAdd)) - - microSecondsToAdd := 506 - tv = tv.Add(time.Microsecond * time.Duration(microSecondsToAdd)) - - return tv -} - -func getDecimalForTesting(t *testing.T) decimal.Decimal { - t.Helper() - return decimal.New(9876543210, 123) -} +var ( + timeForTesting = time.Duration(18342121716506000) + decimalForTesting = decimal.New(9876543210, 123) +) func genKeyAndRec(t *testing.T) (model.TableWithPkey, model.Record[model.RecordItems]) { t.Helper() @@ -39,8 +25,8 @@ func genKeyAndRec(t *testing.T) (model.TableWithPkey, model.Record[model.RecordI _, err := rand.Read(pkeyColVal) require.NoError(t, err) - tv := getTimeForTesting(t) - rv := getDecimalForTesting(t) + tv := timeForTesting + rv := decimalForTesting key := model.TableWithPkey{ TableName: "test_src_tbl", @@ -55,10 +41,10 @@ func genKeyAndRec(t *testing.T) (model.TableWithPkey, model.Record[model.RecordI DestinationTableName: "test_dst_tbl", CommitID: 2, Items: model.RecordItems{ - ColToVal: map[string]qvalue.QValue{ - "id": qvalue.QValueInt64{Val: 1}, - "ts": qvalue.QValueTime{Val: tv}, - "rv": qvalue.QValueNumeric{Val: rv}, + ColToVal: map[string]types.QValue{ + "id": types.QValueInt64{Val: 1}, + "ts": types.QValueTime{Val: tv}, + "rv": types.QValueNumeric{Val: rv}, }, }, } diff --git a/flow/connectors/utils/monitoring/monitoring.go b/flow/connectors/utils/monitoring/monitoring.go index acc301ea1a..b0a7a4f91f 100644 --- a/flow/connectors/utils/monitoring/monitoring.go +++ b/flow/connectors/utils/monitoring/monitoring.go @@ -321,6 +321,9 @@ func addPartitionToQRepRun(ctx context.Context, tx pgx.Tx, flowJobName string, return fmt.Errorf("unable to encode TID as string: %w", err) } rangeEnd = rangeEndValue.(string) + case *protos.PartitionRange_ObjectIdRange: + rangeStart = x.ObjectIdRange.Start + rangeEnd = x.ObjectIdRange.End default: return fmt.Errorf("unknown range type: %v", x) } diff --git a/flow/connectors/utils/partition.go b/flow/connectors/utils/partition.go index f60394603a..235ac1b2ce 100644 --- a/flow/connectors/utils/partition.go +++ b/flow/connectors/utils/partition.go @@ -2,63 +2,149 @@ package utils import ( "cmp" + "errors" "fmt" "log/slog" "time" "github.com/google/uuid" "github.com/jackc/pgx/v5/pgtype" + "go.mongodb.org/mongo-driver/v2/bson" "go.temporal.io/sdk/log" "google.golang.org/protobuf/types/known/timestamppb" "github.com/PeerDB-io/peerdb/flow/generated/protos" ) -// Function to compare two values -func compareValues(prevEnd any, start any) int { - switch v := start.(type) { - case int64: - return cmp.Compare(prevEnd.(int64), v) - case uint64: - return cmp.Compare(prevEnd.(uint64), v) - case time.Time: - return prevEnd.(time.Time).Compare(v) - case pgtype.TID: - pe := prevEnd.(pgtype.TID) - if c := cmp.Compare(pe.BlockNumber, v.BlockNumber); c != 0 { +type PartitionRangeType string + +const ( + PartitionEndRangeType PartitionRangeType = "end" + PartitionStartRangeType PartitionRangeType = "start" +) + +type PartitionRangeForComparison struct { + partitionRange *protos.PartitionRange + rangeTypeToCompare PartitionRangeType +} + +// Function to compare the end of a partition with the start of another +func comparePartitionRanges( + previousPartition PartitionRangeForComparison, + currentPartition PartitionRangeForComparison, +) int { + if previousPartition.partitionRange == nil || currentPartition.partitionRange == nil { + slog.Warn("one of the partition ranges is nil, cannot compare") + return 0 + } + switch pr := previousPartition.partitionRange.Range.(type) { + case *protos.PartitionRange_IntRange: + cr, ok := currentPartition.partitionRange.Range.(*protos.PartitionRange_IntRange) + if !ok { + return 0 + } + getVal := func(r *protos.IntPartitionRange, t PartitionRangeType) int64 { + if t == PartitionEndRangeType { + return r.End + } + return r.Start + } + prevVal := getVal(pr.IntRange, previousPartition.rangeTypeToCompare) + currVal := getVal(cr.IntRange, currentPartition.rangeTypeToCompare) + return cmp.Compare(prevVal, currVal) + case *protos.PartitionRange_UintRange: + cr, ok := currentPartition.partitionRange.Range.(*protos.PartitionRange_UintRange) + if !ok { + return 0 + } + getVal := func(r *protos.UIntPartitionRange, t PartitionRangeType) uint64 { + if t == PartitionEndRangeType { + return r.End + } + return r.Start + } + prevVal := getVal(pr.UintRange, previousPartition.rangeTypeToCompare) + currVal := getVal(cr.UintRange, currentPartition.rangeTypeToCompare) + return cmp.Compare(prevVal, currVal) + case *protos.PartitionRange_TimestampRange: + cr, ok := currentPartition.partitionRange.Range.(*protos.PartitionRange_TimestampRange) + if !ok { + return 0 + } + getTime := func(r *protos.TimestampPartitionRange, t PartitionRangeType) time.Time { + if t == PartitionEndRangeType { + return r.End.AsTime() + } + return r.Start.AsTime() + } + prevVal := getTime(pr.TimestampRange, previousPartition.rangeTypeToCompare) + currVal := getTime(cr.TimestampRange, currentPartition.rangeTypeToCompare) + return prevVal.Compare(currVal) + case *protos.PartitionRange_TidRange: + cr, ok := currentPartition.partitionRange.Range.(*protos.PartitionRange_TidRange) + if !ok { + return 0 + } + getTuple := func(r *protos.TIDPartitionRange, t PartitionRangeType) *protos.TID { + if t == PartitionEndRangeType { + return r.End + } + return r.Start + } + prevTuple := getTuple(pr.TidRange, previousPartition.rangeTypeToCompare) + currTuple := getTuple(cr.TidRange, currentPartition.rangeTypeToCompare) + if c := cmp.Compare(prevTuple.BlockNumber, currTuple.BlockNumber); c != 0 { return c } - return cmp.Compare(pe.OffsetNumber, v.OffsetNumber) - case uint32: // xmin - return cmp.Compare(prevEnd.(uint32), v) + return cmp.Compare(prevTuple.OffsetNumber, currTuple.OffsetNumber) default: return 0 } } // Function to adjust start value -func adjustStartValue(prevEnd any, start any) any { - switch start.(type) { - case int64: - return prevEnd.(int64) + 1 - case int32: - return int32(prevEnd.(int64) + 1) - case time.Time: - // postgres & mysql timestamps have microsecond precision - return prevEnd.(time.Time).Add(1 * time.Microsecond) - case pgtype.TID: - pe := prevEnd.(pgtype.TID) - if pe.OffsetNumber < 0xFFFF { - pe.OffsetNumber++ - } else { - pe.BlockNumber++ - pe.OffsetNumber = 0 - } - return pe - case uint32: - return prevEnd.(uint32) + 1 +func adjustStartValueOfPartition(prevRange *protos.PartitionRange, currentRange *protos.PartitionRange) { + if prevRange == nil || currentRange == nil { + return + } + + switch cr := currentRange.Range.(type) { + case *protos.PartitionRange_IntRange: + if pr, ok := prevRange.Range.(*protos.PartitionRange_IntRange); ok { + cr.IntRange.Start = pr.IntRange.End + 1 + } + return + + case *protos.PartitionRange_UintRange: + if pr, ok := prevRange.Range.(*protos.PartitionRange_UintRange); ok { + cr.UintRange.Start = pr.UintRange.End + 1 + } + return + + case *protos.PartitionRange_TimestampRange: + if pr, ok := prevRange.Range.(*protos.PartitionRange_TimestampRange); ok { + cr.TimestampRange.Start = timestamppb.New(pr.TimestampRange.End.AsTime().Add(1 * time.Microsecond)) + } + return + + case *protos.PartitionRange_TidRange: + if pr, ok := prevRange.Range.(*protos.PartitionRange_TidRange); ok { + start := &protos.TID{ + BlockNumber: pr.TidRange.End.BlockNumber, + OffsetNumber: pr.TidRange.End.OffsetNumber, + } + if start.OffsetNumber < 0xFFFF { + start.OffsetNumber++ + } else { + start.BlockNumber++ + start.OffsetNumber = 0 + } + cr.TidRange.Start = start + } + return + default: - return start + return } } @@ -128,6 +214,20 @@ func createUIntPartition(start uint64, end uint64) *protos.QRepPartition { } } +func createObjectIdPartition(start bson.ObjectID, end bson.ObjectID) *protos.QRepPartition { + return &protos.QRepPartition{ + PartitionId: uuid.New().String(), + Range: &protos.PartitionRange{ + Range: &protos.PartitionRange_ObjectIdRange{ + ObjectIdRange: &protos.ObjectIdPartitionRange{ + Start: start.Hex(), + End: end.Hex(), + }, + }, + }, + } +} + type PartitionHelper struct { logger log.Logger prevStart any @@ -144,66 +244,120 @@ func NewPartitionHelper(logger log.Logger) *PartitionHelper { func (p *PartitionHelper) AddPartition(start any, end any) error { p.logger.Info("adding partition", slog.Any("start", start), slog.Any("end", end)) + currentPartition, err := p.getPartitionForStartAndEnd(start, end) + if err != nil { + return fmt.Errorf("error getting current partition from start and end: %w", err) + } + + prevPartition, err := p.getPartitionForStartAndEnd(p.prevStart, p.prevEnd) + if err != nil { + return fmt.Errorf("error getting previous partition from prevStart and prevEnd: %w", err) + } // Skip partition if it's fully contained within the previous one // If it's not fully contained but overlaps, adjust the start - if p.prevEnd != nil { - if compareValues(p.prevEnd, start) >= 0 { + if prevPartition != nil { + prevEndCompareStart := comparePartitionRanges( + PartitionRangeForComparison{ + partitionRange: prevPartition.Range, + rangeTypeToCompare: PartitionEndRangeType, + }, + PartitionRangeForComparison{ + partitionRange: currentPartition.Range, + rangeTypeToCompare: PartitionStartRangeType, + }) + if prevEndCompareStart >= 0 { + prevEndCompareEnd := comparePartitionRanges( + PartitionRangeForComparison{ + partitionRange: prevPartition.Range, + rangeTypeToCompare: PartitionEndRangeType, + }, + PartitionRangeForComparison{ + partitionRange: currentPartition.Range, + rangeTypeToCompare: PartitionEndRangeType, + }, + ) // If end is also less than or equal to prevEnd, skip this partition - if compareValues(p.prevEnd, end) >= 0 { + if prevEndCompareEnd >= 0 { // log the skipped partition p.logger.Info("skipping partition, fully contained within previous partition", slog.Any("start", start), slog.Any("end", end), slog.Any("prevStart", p.prevStart), slog.Any("prevEnd", p.prevEnd)) return nil } // If end is greater than prevEnd, adjust the start - start = adjustStartValue(p.prevEnd, start) + adjustStartValueOfPartition(prevPartition.Range, currentPartition.Range) } } + if err := p.updatePartitionHelper(currentPartition); err != nil { + return fmt.Errorf("error adjusting start value: %w", err) + } + + return nil +} + +func (p *PartitionHelper) getPartitionForStartAndEnd(start any, end any) (*protos.QRepPartition, error) { + if start == nil || end == nil { + return nil, nil + } switch v := start.(type) { case int64: - p.partitions = append(p.partitions, createIntPartition(v, end.(int64))) - p.prevStart = v - p.prevEnd = end + return createIntPartition(v, end.(int64)), nil case uint64: - p.partitions = append(p.partitions, createUIntPartition(v, end.(uint64))) - p.prevStart = v - p.prevEnd = end.(uint64) + return createUIntPartition(v, end.(uint64)), nil case int32: - p.partitions = append(p.partitions, createIntPartition(int64(v), int64(end.(int32)))) - p.prevStart = int64(v) - p.prevEnd = int64(end.(int32)) + return createIntPartition(int64(v), int64(end.(int32))), nil case uint32: - p.partitions = append(p.partitions, createUIntPartition(uint64(v), uint64(end.(uint32)))) - p.prevStart = uint64(v) - p.prevEnd = uint64(end.(uint32)) + return createUIntPartition(uint64(v), uint64(end.(uint32))), nil case int16: - p.partitions = append(p.partitions, createIntPartition(int64(v), int64(end.(int16)))) - p.prevStart = int64(v) - p.prevEnd = int64(end.(int16)) + return createIntPartition(int64(v), int64(end.(int16))), nil case uint16: - p.partitions = append(p.partitions, createUIntPartition(uint64(v), uint64(end.(uint16)))) - p.prevStart = uint64(v) - p.prevEnd = uint64(end.(uint16)) + return createUIntPartition(uint64(v), uint64(end.(uint16))), nil case int8: - p.partitions = append(p.partitions, createIntPartition(int64(v), int64(end.(int8)))) - p.prevStart = int64(v) - p.prevEnd = int64(end.(int8)) + return createIntPartition(int64(v), int64(end.(int8))), nil case uint8: - p.partitions = append(p.partitions, createUIntPartition(uint64(v), uint64(end.(uint8)))) - p.prevStart = uint64(v) - p.prevEnd = uint64(end.(uint8)) + return createUIntPartition(uint64(v), uint64(end.(uint8))), nil case time.Time: - p.partitions = append(p.partitions, createTimePartition(v, end.(time.Time))) - p.prevStart = v - p.prevEnd = end + return createTimePartition(v, end.(time.Time)), nil case pgtype.TID: - p.partitions = append(p.partitions, createTIDPartition(v, end.(pgtype.TID))) + return createTIDPartition(v, end.(pgtype.TID)), nil + case bson.ObjectID: + p.partitions = append(p.partitions, createObjectIdPartition(v, end.(bson.ObjectID))) p.prevStart = v p.prevEnd = end default: - return fmt.Errorf("unsupported type: %T", v) + return nil, fmt.Errorf("unsupported type: %T", v) + } + return nil, nil +} + +func (p *PartitionHelper) updatePartitionHelper(partition *protos.QRepPartition) error { + if partition == nil { + return errors.New("partition is nil") + } + p.partitions = append(p.partitions, partition) + + switch r := partition.Range.Range.(type) { + case *protos.PartitionRange_IntRange: + p.prevStart = r.IntRange.Start + p.prevEnd = r.IntRange.End + case *protos.PartitionRange_UintRange: + p.prevStart = r.UintRange.Start + p.prevEnd = r.UintRange.End + case *protos.PartitionRange_TimestampRange: + p.prevStart = r.TimestampRange.Start.AsTime() + p.prevEnd = r.TimestampRange.End.AsTime() + case *protos.PartitionRange_TidRange: + p.prevStart = pgtype.TID{ + BlockNumber: r.TidRange.Start.BlockNumber, + OffsetNumber: uint16(r.TidRange.Start.OffsetNumber), + } + p.prevEnd = pgtype.TID{ + BlockNumber: r.TidRange.End.BlockNumber, + OffsetNumber: uint16(r.TidRange.End.OffsetNumber), + } + default: + return fmt.Errorf("unsupported partition range type: %T", r) } return nil diff --git a/flow/connectors/utils/peers.go b/flow/connectors/utils/peers.go index c48e936e2b..c4527bdf84 100644 --- a/flow/connectors/utils/peers.go +++ b/flow/connectors/utils/peers.go @@ -87,6 +87,12 @@ func CreatePeerNoValidate( return wrongConfigResponse, nil } innerConfig = esConfigObject.ElasticsearchConfig + case protos.DBType_MONGO: + mongoConfigObject, ok := config.(*protos.Peer_MongoConfig) + if !ok { + return wrongConfigResponse, nil + } + innerConfig = mongoConfigObject.MongoConfig default: return wrongConfigResponse, nil } diff --git a/flow/connectors/utils/rds.go b/flow/connectors/utils/rds.go index 8a857ecfee..77cfea0add 100644 --- a/flow/connectors/utils/rds.go +++ b/flow/connectors/utils/rds.go @@ -14,6 +14,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared" "github.com/PeerDB-io/peerdb/flow/shared/exceptions" ) @@ -114,15 +115,11 @@ func buildRdsToken( if err != nil { return "", fmt.Errorf("failed to get AWS credentials provider: %w", err) } - endpoint := fmt.Sprintf("%s:%d", connConfig.Host, connConfig.Port) matches := regionRegex.FindStringSubmatch(connConfig.Host) if len(matches) < 2 { - return "", fmt.Errorf("failed to extract region from endpoint %s", connConfig.Host) + return "", fmt.Errorf("failed to extract region from host %s", connConfig.Host) } region := matches[1] - token, err := auth.BuildAuthToken(ctx, endpoint, region, connConfig.User, awsCredentialsProvider.GetUnderlyingProvider()) - if err != nil { - return "", err - } - return token, nil + endpoint := shared.JoinHostPort(connConfig.Host, connConfig.Port) + return auth.BuildAuthToken(ctx, endpoint, region, connConfig.User, awsCredentialsProvider.GetUnderlyingProvider()) } diff --git a/flow/connectors/utils/ssh.go b/flow/connectors/utils/ssh.go index 08cd9c1325..833e36f156 100644 --- a/flow/connectors/utils/ssh.go +++ b/flow/connectors/utils/ssh.go @@ -11,6 +11,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared" "github.com/PeerDB-io/peerdb/flow/shared/exceptions" ) @@ -71,7 +72,7 @@ func NewSSHTunnel( ) (SSHTunnel, error) { if sshConfig != nil { logger := internal.LoggerFromCtx(ctx) - sshServer := fmt.Sprintf("%s:%d", sshConfig.Host, sshConfig.Port) + sshServer := shared.JoinHostPort(sshConfig.Host, sshConfig.Port) clientConfig, err := GetSSHClientConfig(sshConfig) if err != nil { logger.Error("Failed to get SSH client config", "error", err) diff --git a/flow/connectors/utils/stream.go b/flow/connectors/utils/stream.go index 02f8abdc59..d8808dda54 100644 --- a/flow/connectors/utils/stream.go +++ b/flow/connectors/utils/stream.go @@ -5,54 +5,58 @@ import ( "time" "github.com/google/uuid" + "github.com/shopspring/decimal" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) -func RecordsToRawTableStream[Items model.Items](req *model.RecordsToStreamRequest[Items]) (*model.QRecordStream, error) { +func RecordsToRawTableStream[Items model.Items]( + req *model.RecordsToStreamRequest[Items], numericTruncator model.StreamNumericTruncator, +) (*model.QRecordStream, error) { recordStream := model.NewQRecordStream(1 << 17) - recordStream.SetSchema(qvalue.QRecordSchema{ - Fields: []qvalue.QField{ + recordStream.SetSchema(types.QRecordSchema{ + Fields: []types.QField{ { Name: "_peerdb_uid", - Type: qvalue.QValueKindString, + Type: types.QValueKindString, Nullable: false, }, { Name: "_peerdb_timestamp", - Type: qvalue.QValueKindInt64, + Type: types.QValueKindInt64, Nullable: false, }, { Name: "_peerdb_destination_table_name", - Type: qvalue.QValueKindString, + Type: types.QValueKindString, Nullable: false, }, { Name: "_peerdb_data", - Type: qvalue.QValueKindString, + Type: types.QValueKindString, Nullable: false, }, { Name: "_peerdb_record_type", - Type: qvalue.QValueKindInt64, + Type: types.QValueKindInt64, Nullable: true, }, { Name: "_peerdb_match_data", - Type: qvalue.QValueKindString, + Type: types.QValueKindString, Nullable: true, }, { Name: "_peerdb_batch_id", - Type: qvalue.QValueKindInt64, + Type: types.QValueKindInt64, Nullable: true, }, { Name: "_peerdb_unchanged_toast_columns", - Type: qvalue.QValueKindString, + Type: types.QValueKindString, Nullable: true, }, }, @@ -61,7 +65,9 @@ func RecordsToRawTableStream[Items model.Items](req *model.RecordsToStreamReques go func() { for record := range req.GetRecords() { record.PopulateCountMap(req.TableMapping) - qRecord, err := recordToQRecordOrError(req.BatchID, record) + qRecord, err := recordToQRecordOrError( + req.BatchID, record, req.TargetDWH, req.UnboundedNumericAsString, numericTruncator, + ) if err != nil { recordStream.Close(err) return @@ -75,21 +81,32 @@ func RecordsToRawTableStream[Items model.Items](req *model.RecordsToStreamReques return recordStream, nil } -func recordToQRecordOrError[Items model.Items](batchID int64, record model.Record[Items]) ([]qvalue.QValue, error) { - var entries [8]qvalue.QValue +func recordToQRecordOrError[Items model.Items]( + batchID int64, record model.Record[Items], targetDWH protos.DBType, unboundedNumericAsString bool, + numericTruncator model.StreamNumericTruncator, +) ([]types.QValue, error) { + var entries [8]types.QValue switch typedRecord := record.(type) { case *model.InsertRecord[Items]: - itemsJSON, err := model.ItemsToJSON(typedRecord.Items) + tableNumericTruncator := numericTruncator.Get(typedRecord.DestinationTableName) + preprocessedItems := truncateNumerics( + typedRecord.Items, targetDWH, unboundedNumericAsString, tableNumericTruncator, + ) + itemsJSON, err := model.ItemsToJSON(preprocessedItems) if err != nil { return nil, fmt.Errorf("failed to serialize insert record items to JSON: %w", err) } - entries[3] = qvalue.QValueString{Val: itemsJSON} - entries[4] = qvalue.QValueInt64{Val: 0} - entries[5] = qvalue.QValueString{Val: ""} - entries[7] = qvalue.QValueString{Val: ""} + entries[3] = types.QValueString{Val: itemsJSON} + entries[4] = types.QValueInt64{Val: 0} + entries[5] = types.QValueString{Val: ""} + entries[7] = types.QValueString{Val: ""} case *model.UpdateRecord[Items]: - newItemsJSON, err := model.ItemsToJSON(typedRecord.NewItems) + tableNumericTruncator := numericTruncator.Get(typedRecord.DestinationTableName) + preprocessedItems := truncateNumerics( + typedRecord.NewItems, targetDWH, unboundedNumericAsString, tableNumericTruncator, + ) + newItemsJSON, err := model.ItemsToJSON(preprocessedItems) if err != nil { return nil, fmt.Errorf("failed to serialize update record new items to JSON: %w", err) } @@ -98,10 +115,10 @@ func recordToQRecordOrError[Items model.Items](batchID int64, record model.Recor return nil, fmt.Errorf("failed to serialize update record old items to JSON: %w", err) } - entries[3] = qvalue.QValueString{Val: newItemsJSON} - entries[4] = qvalue.QValueInt64{Val: 1} - entries[5] = qvalue.QValueString{Val: oldItemsJSON} - entries[7] = qvalue.QValueString{Val: KeysToString(typedRecord.UnchangedToastColumns)} + entries[3] = types.QValueString{Val: newItemsJSON} + entries[4] = types.QValueInt64{Val: 1} + entries[5] = types.QValueString{Val: oldItemsJSON} + entries[7] = types.QValueString{Val: KeysToString(typedRecord.UnchangedToastColumns)} case *model.DeleteRecord[Items]: itemsJSON, err := model.ItemsToJSON(typedRecord.Items) @@ -109,10 +126,10 @@ func recordToQRecordOrError[Items model.Items](batchID int64, record model.Recor return nil, fmt.Errorf("failed to serialize delete record items to JSON: %w", err) } - entries[3] = qvalue.QValueString{Val: itemsJSON} - entries[4] = qvalue.QValueInt64{Val: 2} - entries[5] = qvalue.QValueString{Val: itemsJSON} - entries[7] = qvalue.QValueString{Val: KeysToString(typedRecord.UnchangedToastColumns)} + entries[3] = types.QValueString{Val: itemsJSON} + entries[4] = types.QValueInt64{Val: 2} + entries[5] = types.QValueString{Val: itemsJSON} + entries[7] = types.QValueString{Val: KeysToString(typedRecord.UnchangedToastColumns)} case *model.MessageRecord[Items]: return nil, nil @@ -121,10 +138,10 @@ func recordToQRecordOrError[Items model.Items](batchID int64, record model.Recor return nil, fmt.Errorf("unknown record type: %T", typedRecord) } - entries[0] = qvalue.QValueUUID{Val: uuid.New()} - entries[1] = qvalue.QValueInt64{Val: time.Now().UnixNano()} - entries[2] = qvalue.QValueString{Val: record.GetDestinationTableName()} - entries[6] = qvalue.QValueInt64{Val: batchID} + entries[0] = types.QValueUUID{Val: uuid.New()} + entries[1] = types.QValueInt64{Val: time.Now().UnixNano()} + entries[2] = types.QValueString{Val: record.GetDestinationTableName()} + entries[6] = types.QValueInt64{Val: batchID} return entries[:], nil } @@ -137,3 +154,80 @@ func InitialiseTableRowsMap(tableMaps []*protos.TableMapping) map[string]*model. return tableNameRowsMapping } + +func truncateNumerics( + items model.Items, targetDWH protos.DBType, unboundedNumericAsString bool, + numericTruncator *model.CdcTableNumericTruncator, +) model.Items { + recordItems, ok := items.(model.RecordItems) + if !ok { + return items + } + hasNumerics := false + for col, val := range recordItems.ColToVal { + if !numericTruncator.Get(col).Skip { + if val.Kind() == types.QValueKindNumeric || val.Kind() == types.QValueKindArrayNumeric { + hasNumerics = true + break + } + } + } + if !hasNumerics { + return items + } + + newItems := model.NewRecordItems(recordItems.Len()) + for col, val := range recordItems.ColToVal { + newVal := val + + columnTruncator := numericTruncator.Get(col) + if !columnTruncator.Skip { + switch numeric := val.(type) { + case types.QValueNumeric: + destType := qvalue.GetNumericDestinationType( + numeric.Precision, numeric.Scale, targetDWH, unboundedNumericAsString, + ) + if destType.IsString { + newVal = val + } else { + truncated, ok := qvalue.TruncateNumeric( + numeric.Val, destType.Precision, destType.Scale, targetDWH, columnTruncator.Stat, + ) + if !ok { + truncated = decimal.Zero + } + newVal = types.QValueNumeric{ + Val: truncated, + Precision: destType.Precision, + Scale: destType.Scale, + } + } + case types.QValueArrayNumeric: + destType := qvalue.GetNumericDestinationType( + numeric.Precision, numeric.Scale, targetDWH, unboundedNumericAsString, + ) + if destType.IsString { + newVal = val + } else { + truncatedArr := make([]decimal.Decimal, 0, len(numeric.Val)) + for _, num := range numeric.Val { + truncated, ok := qvalue.TruncateNumeric( + num, destType.Precision, destType.Scale, targetDWH, columnTruncator.Stat, + ) + if !ok { + truncated = decimal.Zero + } + truncatedArr = append(truncatedArr, truncated) + } + newVal = types.QValueArrayNumeric{ + Val: truncatedArr, + Precision: destType.Precision, + Scale: destType.Scale, + } + } + } + } + newItems.ColToVal[col] = newVal + } + return newItems +} diff --git a/flow/e2e/api/api_test.go b/flow/e2e/api/api_test.go index 0b19572eac..13abff5c65 100644 --- a/flow/e2e/api/api_test.go +++ b/flow/e2e/api/api_test.go @@ -782,13 +782,14 @@ func (s Suite) TestCustomSync() { require.ErrorContains(s.t, err, "mirror unknown-flow does not exist") tblName := "apitable" + srcTableName := e2e.AttachSchema(s, tblName) require.NoError(s.t, s.source.Exec(s.t.Context(), - fmt.Sprintf("CREATE TABLE %s(id int primary key, val text)", e2e.AttachSchema(s, tblName)))) + fmt.Sprintf("CREATE TABLE %s(id int primary key, val text)", srcTableName))) require.NoError(s.t, s.source.Exec(s.t.Context(), - fmt.Sprintf("INSERT INTO %s(id, val) values (1,'first')", e2e.AttachSchema(s, tblName)))) + fmt.Sprintf("INSERT INTO %s(id, val) values (1,'first')", srcTableName))) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: "mirrorapi" + s.suffix, - TableNameMapping: map[string]string{e2e.AttachSchema(s, tblName): tblName}, + TableNameMapping: map[string]string{srcTableName: tblName}, Destination: s.ch.Peer().Name, } flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) @@ -841,14 +842,17 @@ func (s Suite) TestCustomSync() { return env.GetFlowStatus(s.t) == protos.FlowStatus_STATUS_PAUSED }) + // TODO fix race, signals can be dropped if received with unfortunate timing + time.Sleep(time.Second) + customResponse, err := s.CustomSyncFlow(s.t.Context(), &protos.CreateCustomSyncRequest{FlowJobName: flowConnConfig.FlowJobName, NumberOfSyncs: 1}) require.NoError(s.t, err) require.Equal(s.t, flowConnConfig.FlowJobName, customResponse.FlowJobName) require.Equal(s.t, int32(1), customResponse.NumberOfSyncs) require.NoError(s.t, s.source.Exec(s.t.Context(), - fmt.Sprintf("INSERT INTO %s(id, val) values (2,'pause')", e2e.AttachSchema(s, tblName)))) - e2e.EnvWaitFor(s.t, env, 3*time.Minute, "pausing for add table", func() bool { + fmt.Sprintf("INSERT INTO %s(id, val) values (2,'pause')", srcTableName))) + e2e.EnvWaitFor(s.t, env, 3*time.Minute, "pausing for custom sync", func() bool { return env.GetFlowStatus(s.t) == protos.FlowStatus_STATUS_PAUSED }) e2e.RequireEqualTables(s.ch, tblName, "id,val") @@ -882,10 +886,10 @@ func (s Suite) TestCustomSync() { } func (s Suite) TestQRep() { - if _, ok := s.source.(*e2e.PostgresSource); !ok { - s.t.Skip("only run with pg as mysql qrep isn't really supported") - } - + peerType, err := s.GetPeerType(s.t.Context(), &protos.PeerInfoRequest{ + PeerName: s.source.GeneratePeer(s.t).Name, + }) + require.NoError(s.t, err) tblName := "qrepapi" schemaQualified := e2e.AttachSchema(s, tblName) require.NoError(s.t, s.source.Exec(s.t.Context(), @@ -893,22 +897,24 @@ func (s Suite) TestQRep() { require.NoError(s.t, s.source.Exec(s.t.Context(), fmt.Sprintf("INSERT INTO %s(id, val) values (1,'first')", schemaQualified))) - sourcePeer := s.Source().GeneratePeer(s.t) qrepConfig := e2e.CreateQRepWorkflowConfig( s.t, - "qrepapiflow", + "qrepapiflow"+"_"+peerType.PeerType, schemaQualified, - schemaQualified+"dst", + tblName, fmt.Sprintf("SELECT * FROM %s WHERE id BETWEEN {{.start}} AND {{.end}}", schemaQualified), - sourcePeer.Name, + s.ch.Peer().Name, "", true, "", "", ) + qrepConfig.SourceName = s.source.GeneratePeer(s.t).Name qrepConfig.WatermarkColumn = "id" - - _, err := s.CreateQRepFlow(s.t.Context(), &protos.CreateQRepFlowRequest{ + qrepConfig.InitialCopyOnly = false + qrepConfig.WaitBetweenBatchesSeconds = 5 + qrepConfig.NumRowsPerPartition = 1 + _, err = s.CreateQRepFlow(s.t.Context(), &protos.CreateQRepFlowRequest{ QrepConfig: qrepConfig, CreateCatalogEntry: true, }) @@ -917,19 +923,23 @@ func (s Suite) TestQRep() { tc := e2e.NewTemporalClient(s.t) env, err := e2e.GetPeerflow(s.t.Context(), s.pg.PostgresConnector.Conn(), tc, qrepConfig.FlowJobName) require.NoError(s.t, err) - e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) - require.NoError(s.t, env.Error(s.t.Context())) + e2e.EnvWaitForEqualTables(env, s.ch, "qrep initial load", tblName, "id,val") + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf("INSERT INTO %s(id, val) values (2,'second')", schemaQualified))) + + e2e.EnvWaitForEqualTables(env, s.ch, "insert post qrep initial load", tblName, "id,val") statusResponse, err := s.MirrorStatus(s.t.Context(), &protos.MirrorStatusRequest{ FlowJobName: qrepConfig.FlowJobName, IncludeFlowInfo: true, ExcludeBatches: false, }) require.NoError(s.t, err) - require.Equal(s.t, protos.FlowStatus_STATUS_COMPLETED, statusResponse.CurrentFlowState) qStatus := statusResponse.GetQrepStatus() require.NotNil(s.t, qStatus) - require.Len(s.t, qStatus.Partitions, 1) - require.Equal(s.t, int64(1), qStatus.Partitions[0].RowsInPartition) - require.Equal(s.t, int64(1), qStatus.Partitions[0].RowsSynced) + require.Len(s.t, qStatus.Partitions, 2) + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) } diff --git a/flow/e2e/bigquery/bigquery_helper.go b/flow/e2e/bigquery/bigquery_helper.go index 04350d4e93..ef06bf5801 100644 --- a/flow/e2e/bigquery/bigquery_helper.go +++ b/flow/e2e/bigquery/bigquery_helper.go @@ -21,7 +21,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/e2eshared" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type BigQueryTestHelper struct { @@ -173,33 +173,35 @@ func (b *BigQueryTestHelper) countRowsWithDataset(ctx context.Context, dataset, return int(cntI64), nil } -func toQValue(bqValue bigquery.Value) (qvalue.QValue, error) { - // Based on the real type of the bigquery.Value, we create a qvalue.QValue +func toQValue(bqValue bigquery.Value) (types.QValue, error) { + // Based on the real type of the bigquery.Value, we create a types.QValue switch v := bqValue.(type) { case int64: - return qvalue.QValueInt64{Val: v}, nil + return types.QValueInt64{Val: v}, nil case float64: - return qvalue.QValueFloat64{Val: v}, nil + return types.QValueFloat64{Val: v}, nil case string: - return qvalue.QValueString{Val: v}, nil + return types.QValueString{Val: v}, nil case bool: - return qvalue.QValueBoolean{Val: v}, nil + return types.QValueBoolean{Val: v}, nil case civil.Date: - return qvalue.QValueDate{Val: v.In(time.UTC)}, nil + return types.QValueDate{Val: v.In(time.UTC)}, nil case civil.Time: - tm := time.Unix(int64(v.Hour)*3600+int64(v.Minute)*60+int64(v.Second), int64(v.Nanosecond)) - return qvalue.QValueTime{Val: tm}, nil + return types.QValueTime{Val: time.Duration(v.Hour)*time.Hour + + time.Duration(v.Minute)*time.Minute + + time.Duration(v.Second)*time.Second + + time.Duration(v.Nanosecond)*time.Nanosecond}, nil case time.Time: - return qvalue.QValueTimestamp{Val: v}, nil + return types.QValueTimestamp{Val: v}, nil case *big.Rat: - return qvalue.QValueNumeric{Val: decimal.NewFromBigRat(v, 32)}, nil + return types.QValueNumeric{Val: decimal.NewFromBigRat(v, 32)}, nil case []uint8: - return qvalue.QValueBytes{Val: v}, nil + return types.QValueBytes{Val: v}, nil case []bigquery.Value: // If the type is an array, we need to convert each element // we can assume all elements are of the same type, let us use first element if len(v) == 0 { - return qvalue.QValueNull(qvalue.QValueKindInvalid), nil + return types.QValueNull(types.QValueKindInvalid), nil } firstElement := v[0] @@ -209,44 +211,50 @@ func toQValue(bqValue bigquery.Value) (qvalue.QValue, error) { for _, val := range v { arr = append(arr, val.(int64)) } - return qvalue.QValueArrayInt64{Val: arr}, nil + return types.QValueArrayInt64{Val: arr}, nil case float64: var arr []float64 for _, val := range v { arr = append(arr, val.(float64)) } - return qvalue.QValueArrayFloat64{Val: arr}, nil + return types.QValueArrayFloat64{Val: arr}, nil case string: var arr []string for _, val := range v { arr = append(arr, val.(string)) } - return qvalue.QValueArrayString{Val: arr}, nil + return types.QValueArrayString{Val: arr}, nil case time.Time: var arr []time.Time for _, val := range v { arr = append(arr, val.(time.Time)) } - return qvalue.QValueArrayTimestamp{Val: arr}, nil + return types.QValueArrayTimestamp{Val: arr}, nil case civil.Date: var arr []time.Time for _, val := range v { arr = append(arr, val.(civil.Date).In(time.UTC)) } - return qvalue.QValueArrayDate{Val: arr}, nil + return types.QValueArrayDate{Val: arr}, nil case bool: var arr []bool for _, val := range v { arr = append(arr, val.(bool)) } - return qvalue.QValueArrayBoolean{Val: arr}, nil + return types.QValueArrayBoolean{Val: arr}, nil + case *big.Rat: + var arr []decimal.Decimal + for _, val := range v { + arr = append(arr, decimal.NewFromBigRat(val.(*big.Rat), 32)) + } + return types.QValueArrayNumeric{Val: arr}, nil default: // If type is unsupported, return error return nil, fmt.Errorf("bqHelper unsupported type %T", et) } case nil: - return qvalue.QValueNull(qvalue.QValueKindInvalid), nil + return types.QValueNull(types.QValueKindInvalid), nil default: // If type is unsupported, return error return nil, fmt.Errorf("bqHelper unsupported type %T", v) @@ -254,12 +262,12 @@ func toQValue(bqValue bigquery.Value) (qvalue.QValue, error) { } // bqSchemaToQRecordSchema converts a bigquery schema to a QRecordSchema. -func bqSchemaToQRecordSchema(schema bigquery.Schema) qvalue.QRecordSchema { - fields := make([]qvalue.QField, 0, len(schema)) +func bqSchemaToQRecordSchema(schema bigquery.Schema) types.QRecordSchema { + fields := make([]types.QField, 0, len(schema)) for _, fieldSchema := range schema { fields = append(fields, peer_bq.BigQueryFieldToQField(fieldSchema)) } - return qvalue.QRecordSchema{Fields: fields} + return types.QRecordSchema{Fields: fields} } func (b *BigQueryTestHelper) ExecuteAndProcessQuery(ctx context.Context, query string) (*model.QRecordBatch, error) { @@ -270,7 +278,7 @@ func (b *BigQueryTestHelper) ExecuteAndProcessQuery(ctx context.Context, query s return nil, fmt.Errorf("failed to run command: %w", err) } - var records [][]qvalue.QValue + var records [][]types.QValue for { var row []bigquery.Value if err := it.Next(&row); err != nil { @@ -280,8 +288,8 @@ func (b *BigQueryTestHelper) ExecuteAndProcessQuery(ctx context.Context, query s return nil, fmt.Errorf("failed to iterate over query results: %w", err) } - // Convert []bigquery.Value to []qvalue.QValue - qValues := make([]qvalue.QValue, len(row)) + // Convert []bigquery.Value to []types.QValue + qValues := make([]types.QValue, len(row)) for i, val := range row { qv, err := toQValue(val) if err != nil { @@ -371,7 +379,7 @@ func (b *BigQueryTestHelper) RunInt64Query(ctx context.Context, query string) (i return 0, fmt.Errorf("expected only 1 record, got %d", len(recordBatch.Records)) } - if v, ok := recordBatch.Records[0][0].(qvalue.QValueInt64); ok { + if v, ok := recordBatch.Records[0][0].(types.QValueInt64); ok { return v.Val, nil } return 0, fmt.Errorf("non-integer result: %T", recordBatch.Records[0][0]) diff --git a/flow/e2e/bigquery/peer_flow_bq_test.go b/flow/e2e/bigquery/peer_flow_bq_test.go index 9a2c4d0e10..ad9ad2e207 100644 --- a/flow/e2e/bigquery/peer_flow_bq_test.go +++ b/flow/e2e/bigquery/peer_flow_bq_test.go @@ -14,8 +14,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/e2e" "github.com/PeerDB-io/peerdb/flow/e2eshared" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" peerflow "github.com/PeerDB-io/peerdb/flow/workflows" ) @@ -73,7 +73,7 @@ func (s PeerFlowE2ETestSuiteBQ) checkPeerdbColumns(dstQualified string, softDele for _, record := range recordBatch.Records { for _, entry := range record { switch entry.(type) { - case qvalue.QValueBoolean, qvalue.QValueTimestamp: + case types.QValueBoolean, types.QValueTimestamp: recordCount += 1 } } @@ -384,7 +384,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 INT[], c43 FLOAT[], c44 TEXT[], c45 mood, c46 HSTORE, - c47 DATE[], c48 TIMESTAMPTZ[], c49 TIMESTAMP[], c50 BOOLEAN[], c51 SMALLINT[], c52 NUMERIC); + c47 DATE[], c48 TIMESTAMPTZ[], c49 TIMESTAMP[], c50 BOOLEAN[], c51 SMALLINT[], c52 NUMERIC, + c53 NUMERIC(16,2)[], c54 NUMERIC[]); `, srcTableName)) require.NoError(s.t, err) @@ -420,7 +421,8 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { '{"2020-01-01 01:01:01+00", "2020-01-02 01:01:01+00"}'::timestamptz[], '{"2020-01-01 01:01:01", "2020-01-02 01:01:01"}'::timestamp[], '{true, false}'::boolean[], - '{1, 2}'::smallint[]; + '{1, 2}'::smallint[], null, + '{1.2, 1.23, null}'::numeric(16,2)[], '{1.2, 1.23, null}'::numeric[]; `, srcTableName)) e2e.EnvNoError(s.t, env, err) @@ -430,7 +432,7 @@ func (s PeerFlowE2ETestSuiteBQ) Test_Types_BQ() { "c6", "c39", "c40", "id", "c9", "c11", "c12", "c13", "c14", "c15", "c16", "c17", "c18", "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46", "c47", "c48", - "c49", "c50", "c51", + "c49", "c50", "c51", "c53", "c54", }) if err != nil { s.t.Log(err) diff --git a/flow/e2e/clickhouse/clickhouse.go b/flow/e2e/clickhouse/clickhouse.go index 0cdabfa4fd..81d915769a 100644 --- a/flow/e2e/clickhouse/clickhouse.go +++ b/flow/e2e/clickhouse/clickhouse.go @@ -8,6 +8,8 @@ import ( "testing" "time" + "github.com/ClickHouse/clickhouse-go/v2" + "github.com/ClickHouse/clickhouse-go/v2/lib/driver" "github.com/google/uuid" "github.com/jackc/pgx/v5" "github.com/shopspring/decimal" @@ -20,7 +22,7 @@ import ( e2e_s3 "github.com/PeerDB-io/peerdb/flow/e2e/s3" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type ClickHouseSuite struct { @@ -64,25 +66,16 @@ func (s ClickHouseSuite) Peer() *protos.Peer { } func (s ClickHouseSuite) PeerForDatabase(dbname string) *protos.Peer { - region := "" - if s.s3Helper.S3Config.Region != nil { - region = *s.s3Helper.S3Config.Region - } - ret := &protos.Peer{ Name: e2e.AddSuffix(s, dbname), Type: protos.DBType_CLICKHOUSE, Config: &protos.Peer_ClickhouseConfig{ ClickhouseConfig: &protos.ClickhouseConfig{ - Host: "localhost", - Port: 9000, - Database: dbname, - S3Path: s.s3Helper.BucketName, - AccessKeyId: *s.s3Helper.S3Config.AccessKeyId, - SecretAccessKey: *s.s3Helper.S3Config.SecretAccessKey, - Region: region, - DisableTls: true, - Endpoint: s.s3Helper.S3Config.Endpoint, + Host: "localhost", + Port: 9000, + Database: dbname, + DisableTls: true, + S3: s.s3Helper.S3Config, }, }, } @@ -133,29 +126,35 @@ func (s ClickHouseSuite) GetRows(table string, cols string) (*model.QRecordBatch } defer ch.Close() - rows, err := ch.Query( - s.t.Context(), - fmt.Sprintf(`SELECT %s FROM %s FINAL WHERE _peerdb_is_deleted = 0 ORDER BY 1 SETTINGS use_query_cache = false`, cols, table), - ) - if err != nil { - return nil, err + var rows driver.Rows + var rowsErr error + if strings.HasPrefix(table, "_peerdb_raw") { + rows, rowsErr = s.queryRawTable(ch, table, cols) + } else { + rows, rowsErr = ch.Query( + s.t.Context(), + fmt.Sprintf(`SELECT %s FROM %s FINAL WHERE _peerdb_is_deleted = 0 ORDER BY 1 SETTINGS use_query_cache = false`, cols, table), + ) + } + if rowsErr != nil { + return nil, rowsErr } defer rows.Close() batch := &model.QRecordBatch{} - types := rows.ColumnTypes() - row := make([]any, 0, len(types)) - tableSchema, err := connclickhouse.GetTableSchemaForTable(&protos.TableMapping{SourceTableIdentifier: table}, types) + colTypes := rows.ColumnTypes() + row := make([]any, 0, len(colTypes)) + tableSchema, err := connclickhouse.GetTableSchemaForTable(&protos.TableMapping{SourceTableIdentifier: table}, colTypes) if err != nil { return nil, err } - for idx, ty := range types { + for idx, ty := range colTypes { fieldDesc := tableSchema.Columns[idx] row = append(row, reflect.New(ty.ScanType()).Interface()) - batch.Schema.Fields = append(batch.Schema.Fields, qvalue.QField{ + batch.Schema.Fields = append(batch.Schema.Fields, types.QField{ Name: ty.Name(), - Type: qvalue.QValueKind(fieldDesc.Type), + Type: types.QValueKind(fieldDesc.Type), Precision: 0, Scale: 0, Nullable: fieldDesc.Nullable, @@ -166,135 +165,139 @@ func (s ClickHouseSuite) GetRows(table string, cols string) (*model.QRecordBatch if err := rows.Scan(row...); err != nil { return nil, err } - qrow := make([]qvalue.QValue, 0, len(row)) + qrow := make([]types.QValue, 0, len(row)) for _, val := range row { switch v := val.(type) { case **string: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindString)) + qrow = append(qrow, types.QValueNull(types.QValueKindString)) } else { - qrow = append(qrow, qvalue.QValueString{Val: **v}) + qrow = append(qrow, types.QValueString{Val: **v}) } case *string: - qrow = append(qrow, qvalue.QValueString{Val: *v}) + qrow = append(qrow, types.QValueString{Val: *v}) case *[]string: - qrow = append(qrow, qvalue.QValueArrayString{Val: *v}) + qrow = append(qrow, types.QValueArrayString{Val: *v}) case **int8: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindInt8)) + qrow = append(qrow, types.QValueNull(types.QValueKindInt8)) } else { - qrow = append(qrow, qvalue.QValueInt8{Val: **v}) + qrow = append(qrow, types.QValueInt8{Val: **v}) } case *int8: - qrow = append(qrow, qvalue.QValueInt8{Val: *v}) + qrow = append(qrow, types.QValueInt8{Val: *v}) case **int16: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindInt16)) + qrow = append(qrow, types.QValueNull(types.QValueKindInt16)) } else { - qrow = append(qrow, qvalue.QValueInt16{Val: **v}) + qrow = append(qrow, types.QValueInt16{Val: **v}) } case *int16: - qrow = append(qrow, qvalue.QValueInt16{Val: *v}) + qrow = append(qrow, types.QValueInt16{Val: *v}) case **int32: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindInt32)) + qrow = append(qrow, types.QValueNull(types.QValueKindInt32)) } else { - qrow = append(qrow, qvalue.QValueInt32{Val: **v}) + qrow = append(qrow, types.QValueInt32{Val: **v}) } case *int32: - qrow = append(qrow, qvalue.QValueInt32{Val: *v}) + qrow = append(qrow, types.QValueInt32{Val: *v}) case *[]int32: - qrow = append(qrow, qvalue.QValueArrayInt32{Val: *v}) + qrow = append(qrow, types.QValueArrayInt32{Val: *v}) case **int64: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindInt64)) + qrow = append(qrow, types.QValueNull(types.QValueKindInt64)) } else { - qrow = append(qrow, qvalue.QValueInt64{Val: **v}) + qrow = append(qrow, types.QValueInt64{Val: **v}) } case *int64: - qrow = append(qrow, qvalue.QValueInt64{Val: *v}) + qrow = append(qrow, types.QValueInt64{Val: *v}) case **time.Time: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindTimestamp)) + qrow = append(qrow, types.QValueNull(types.QValueKindTimestamp)) } else { - qrow = append(qrow, qvalue.QValueTimestamp{Val: **v}) + qrow = append(qrow, types.QValueTimestamp{Val: **v}) } case **uint8: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindUInt8)) + qrow = append(qrow, types.QValueNull(types.QValueKindUInt8)) } else { - qrow = append(qrow, qvalue.QValueUInt8{Val: **v}) + qrow = append(qrow, types.QValueUInt8{Val: **v}) } case *uint8: - qrow = append(qrow, qvalue.QValueUInt8{Val: *v}) + qrow = append(qrow, types.QValueUInt8{Val: *v}) case **uint16: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindUInt16)) + qrow = append(qrow, types.QValueNull(types.QValueKindUInt16)) } else { - qrow = append(qrow, qvalue.QValueUInt16{Val: **v}) + qrow = append(qrow, types.QValueUInt16{Val: **v}) } case *uint16: - qrow = append(qrow, qvalue.QValueUInt16{Val: *v}) + qrow = append(qrow, types.QValueUInt16{Val: *v}) case **uint32: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindUInt32)) + qrow = append(qrow, types.QValueNull(types.QValueKindUInt32)) } else { - qrow = append(qrow, qvalue.QValueUInt32{Val: **v}) + qrow = append(qrow, types.QValueUInt32{Val: **v}) } case *uint32: - qrow = append(qrow, qvalue.QValueUInt32{Val: *v}) + qrow = append(qrow, types.QValueUInt32{Val: *v}) case **uint64: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindUInt64)) + qrow = append(qrow, types.QValueNull(types.QValueKindUInt64)) } else { - qrow = append(qrow, qvalue.QValueUInt64{Val: **v}) + qrow = append(qrow, types.QValueUInt64{Val: **v}) } case *uint64: - qrow = append(qrow, qvalue.QValueUInt64{Val: *v}) + qrow = append(qrow, types.QValueUInt64{Val: *v}) case *time.Time: - qrow = append(qrow, qvalue.QValueTimestamp{Val: *v}) + qrow = append(qrow, types.QValueTimestamp{Val: *v}) + case *[]time.Time: + qrow = append(qrow, types.QValueArrayTimestamp{Val: *v}) case **decimal.Decimal: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindNumeric)) + qrow = append(qrow, types.QValueNull(types.QValueKindNumeric)) } else { - qrow = append(qrow, qvalue.QValueNumeric{Val: **v}) + qrow = append(qrow, types.QValueNumeric{Val: **v}) } case *decimal.Decimal: - qrow = append(qrow, qvalue.QValueNumeric{Val: *v}) + qrow = append(qrow, types.QValueNumeric{Val: *v}) + case *[]decimal.Decimal: + qrow = append(qrow, types.QValueArrayNumeric{Val: *v}) case **bool: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindBoolean)) + qrow = append(qrow, types.QValueNull(types.QValueKindBoolean)) } else { - qrow = append(qrow, qvalue.QValueBoolean{Val: **v}) + qrow = append(qrow, types.QValueBoolean{Val: **v}) } case *bool: - qrow = append(qrow, qvalue.QValueBoolean{Val: *v}) + qrow = append(qrow, types.QValueBoolean{Val: *v}) case **float32: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindFloat32)) + qrow = append(qrow, types.QValueNull(types.QValueKindFloat32)) } else { - qrow = append(qrow, qvalue.QValueFloat32{Val: **v}) + qrow = append(qrow, types.QValueFloat32{Val: **v}) } case *float32: - qrow = append(qrow, qvalue.QValueFloat32{Val: *v}) + qrow = append(qrow, types.QValueFloat32{Val: *v}) case *[]float32: - qrow = append(qrow, qvalue.QValueArrayFloat32{Val: *v}) + qrow = append(qrow, types.QValueArrayFloat32{Val: *v}) case **float64: if *v == nil { - qrow = append(qrow, qvalue.QValueNull(qvalue.QValueKindFloat64)) + qrow = append(qrow, types.QValueNull(types.QValueKindFloat64)) } else { - qrow = append(qrow, qvalue.QValueFloat64{Val: **v}) + qrow = append(qrow, types.QValueFloat64{Val: **v}) } case *float64: - qrow = append(qrow, qvalue.QValueFloat64{Val: *v}) + qrow = append(qrow, types.QValueFloat64{Val: *v}) case *[]float64: - qrow = append(qrow, qvalue.QValueArrayFloat64{Val: *v}) + qrow = append(qrow, types.QValueArrayFloat64{Val: *v}) case *uuid.UUID: - qrow = append(qrow, qvalue.QValueUUID{Val: *v}) + qrow = append(qrow, types.QValueUUID{Val: *v}) case *[]uuid.UUID: - qrow = append(qrow, qvalue.QValueArrayUUID{Val: *v}) + qrow = append(qrow, types.QValueArrayUUID{Val: *v}) default: - return nil, fmt.Errorf("cannot convert %T to qvalue", v) + return nil, fmt.Errorf("cannot convert %T to types", v) } } batch.Records = append(batch.Records, qrow) @@ -303,6 +306,13 @@ func (s ClickHouseSuite) GetRows(table string, cols string) (*model.QRecordBatch return batch, rows.Err() } +func (s ClickHouseSuite) queryRawTable(conn clickhouse.Conn, table string, cols string) (driver.Rows, error) { + return conn.Query( + s.t.Context(), + fmt.Sprintf(`SELECT %s FROM %s ORDER BY 1 SETTINGS use_query_cache = false`, cols, table), + ) +} + func SetupSuite[TSource e2e.SuiteSource]( t *testing.T, setupSource func(*testing.T) (TSource, string, error), diff --git a/flow/e2e/clickhouse/peer_flow_ch_test.go b/flow/e2e/clickhouse/peer_flow_ch_test.go index 2fe385f419..fbd11e30bf 100644 --- a/flow/e2e/clickhouse/peer_flow_ch_test.go +++ b/flow/e2e/clickhouse/peer_flow_ch_test.go @@ -3,6 +3,8 @@ package e2e_clickhouse import ( "embed" "fmt" + "reflect" + "regexp" "strconv" "strings" "testing" @@ -10,6 +12,7 @@ import ( "github.com/jackc/pgerrcode" "github.com/shopspring/decimal" + "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" connclickhouse "github.com/PeerDB-io/peerdb/flow/connectors/clickhouse" @@ -18,8 +21,9 @@ import ( "github.com/PeerDB-io/peerdb/flow/e2eshared" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/clickhouse" + "github.com/PeerDB-io/peerdb/flow/shared/types" peerflow "github.com/PeerDB-io/peerdb/flow/workflows" ) @@ -437,12 +441,14 @@ func (s ClickHouseSuite) WeirdTable(tableName string) { CREATE TABLE IF NOT EXISTS %s ( id SERIAL PRIMARY KEY, key TEXT NOT NULL, - "excludedColumn" TEXT + "includedColumn?" TEXT, + "excludedColumn?" TEXT ); `, srcFullName)) require.NoError(s.t, err) - _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf("INSERT INTO %s (key, \"excludedColumn\") VALUES ('init','excluded')", srcFullName)) + _, err = s.Conn().Exec(s.t.Context(), + fmt.Sprintf("INSERT INTO %s (key, \"includedColumn?\", \"excludedColumn?\") VALUES ('init','include','exclude')", srcFullName)) require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ @@ -465,24 +471,25 @@ func (s ClickHouseSuite) WeirdTable(tableName string) { e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on initial", srcTableName, dstTableName, "id,\"key\"") - _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf("INSERT INTO %s (key, \"excludedColumn\") VALUES ('cdc','excluded')", srcFullName)) + _, err = s.Conn().Exec(s.t.Context(), + fmt.Sprintf("INSERT INTO %s (key, \"includedColumn?\", \"excludedColumn?\") VALUES ('cdc','still','ex')", srcFullName)) require.NoError(s.t, err) e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on cdc", srcTableName, dstTableName, "id,\"key\"") env.Cancel(s.t.Context()) e2e.RequireEnvCanceled(s.t, env) - env = e2e.ExecuteWorkflow(s.t.Context(), tc, shared.PeerFlowTaskQueue, peerflow.DropFlowWorkflow, &protos.DropFlowInput{ FlowJobName: flowConnConfig.FlowJobName, DropFlowStats: false, FlowConnectionConfigs: flowConnConfig, }) e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + // now test weird names with rename based resync ch, err := connclickhouse.Connect(s.t.Context(), nil, s.Peer().GetClickhouseConfig()) require.NoError(s.t, err) - require.NoError(s.t, ch.Exec(s.t.Context(), fmt.Sprintf("DROP TABLE `%s`", dstTableName))) + require.NoError(s.t, ch.Exec(s.t.Context(), "DROP TABLE "+clickhouse.QuoteIdentifier(dstTableName))) require.NoError(s.t, ch.Close()) flowConnConfig.Resync = true env = e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) @@ -500,7 +507,7 @@ func (s ClickHouseSuite) WeirdTable(tableName string) { // now test weird names with exchange based resync ch, err = connclickhouse.Connect(s.t.Context(), nil, s.Peer().GetClickhouseConfig()) require.NoError(s.t, err) - require.NoError(s.t, ch.Exec(s.t.Context(), fmt.Sprintf("TRUNCATE TABLE `%s`", dstTableName))) + require.NoError(s.t, ch.Exec(s.t.Context(), "TRUNCATE TABLE "+clickhouse.QuoteIdentifier(dstTableName))) require.NoError(s.t, ch.Close()) env = e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) @@ -517,6 +524,11 @@ func (s ClickHouseSuite) Test_WeirdTable_MixedCase() { s.WeirdTable("myMixedCaseTable") } +func (s ClickHouseSuite) Test_WeirdTable_Question() { + s.t.SkipNow() // TODO fix avro errors by sanitizing names + s.WeirdTable("whatIsTable?") +} + func (s ClickHouseSuite) Test_WeirdTable_Dash() { s.t.SkipNow() // TODO fix avro errors by sanitizing names s.WeirdTable("table-group") @@ -531,13 +543,15 @@ func (s ClickHouseSuite) Test_Large_Numeric() { CREATE TABLE IF NOT EXISTS %s( id INT PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY, c1 NUMERIC(76,0), - c2 NUMERIC(78,0) + c2 NUMERIC(78,0), + c3 NUMERIC(76,0)[], + c4 NUMERIC(78,0)[] ); `, srcFullName)) require.NoError(s.t, err) - _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf("INSERT INTO %s(c1,c2) VALUES($1,$2)", srcFullName), - strings.Repeat("7", 76), strings.Repeat("9", 78)) + _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf("INSERT INTO %s(c1,c2,c3,c4) VALUES($1,$2,$3,$4)", srcFullName), + strings.Repeat("7", 76), strings.Repeat("9", 78), "{"+strings.Repeat("6", 76)+"}", "{"+strings.Repeat("8", 78)+"}") require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ @@ -552,27 +566,35 @@ func (s ClickHouseSuite) Test_Large_Numeric() { env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) - e2e.EnvWaitForCount(env, s, "waiting for CDC count", dstTableName, "id,c1,c2", 1) + e2e.EnvWaitForCount(env, s, "waiting for CDC count", dstTableName, "id,c1,c2,c3,c4", 1) - _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf("INSERT INTO %s(c1,c2) VALUES($1,$2)", srcFullName), - strings.Repeat("7", 76), strings.Repeat("9", 78)) + _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf("INSERT INTO %s(c1,c2,c3,c4) VALUES($1,$2,$3,$4)", srcFullName), + strings.Repeat("7", 76), strings.Repeat("9", 78), "{"+strings.Repeat("6", 76)+"}", "{"+strings.Repeat("8", 78)+"}") require.NoError(s.t, err) - e2e.EnvWaitForCount(env, s, "waiting for CDC count", dstTableName, "id,c1,c2", 2) + e2e.EnvWaitForCount(env, s, "waiting for CDC count", dstTableName, "id,c1,c2,c3,c4", 2) - rows, err := s.GetRows(dstTableName, "c1,c2") + rows, err := s.GetRows(dstTableName, "c1,c2,c3,c4") require.NoError(s.t, err) require.Len(s.t, rows.Records, 2, "expected 2 rows") for _, row := range rows.Records { - require.Len(s.t, row, 2, "expected 2 columns") - require.Equal(s.t, qvalue.QValueKindNumeric, row[0].Kind(), "expected NUMERIC(76,0) to be Decimal") - require.Equal(s.t, qvalue.QValueKindString, row[1].Kind(), "expected NUMERIC(78,0) to be String") + require.Len(s.t, row, 4, "expected 4 columns") + require.Equal(s.t, types.QValueKindNumeric, row[0].Kind(), "expected NUMERIC(76,0) to be Decimal") + require.Equal(s.t, types.QValueKindString, row[1].Kind(), "expected NUMERIC(78,0) to be String") + require.Equal(s.t, types.QValueKindArrayNumeric, row[2].Kind(), "expected NUMERIC(76,0)[] to be Decimal[]") + require.Equal(s.t, types.QValueKindArrayString, row[3].Kind(), "expected NUMERIC(78,0)[] to be String[]") c1, ok := row[0].Value().(decimal.Decimal) require.True(s.t, ok, "expected NUMERIC(76,0) to be Decimal") require.Equal(s.t, strings.Repeat("7", 76), c1.String(), "expected NUMERIC(76,0) to be 7s") c2, ok := row[1].Value().(string) require.True(s.t, ok, "expected NUMERIC(78,0) to be String") require.Equal(s.t, strings.Repeat("9", 78), c2, "expected NUMERIC(78,0) to be 9s") + c3, ok := row[2].Value().([]decimal.Decimal) + require.True(s.t, ok, "expected NUMERIC(76,0)[] to be Decimal") + require.Equal(s.t, strings.Repeat("6", 76), c3[0].String(), "expected NUMERIC(76,0)[] to be 6s") + c4, ok := row[3].Value().([]string) + require.True(s.t, ok, "expected NUMERIC(78,0)[] to be String[]") + require.Equal(s.t, strings.Repeat("8", 78), c4[0], "expected NUMERIC(78,0)[] to be 8s") } env.Cancel(s.t.Context()) @@ -580,10 +602,6 @@ func (s ClickHouseSuite) Test_Large_Numeric() { } func (s ClickHouseSuite) Test_Destination_Type_Conversion() { - if _, ok := s.source.(*e2e.PostgresSource); !ok { - s.t.Skip("only applies to postgres") - } - srcTableName := "test_destination_type_conversion" srcFullName := s.attachSchemaSuffix(srcTableName) dstTableName := "test_destination_type_conversion" @@ -603,7 +621,7 @@ func (s ClickHouseSuite) Test_Destination_Type_Conversion() { require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ - FlowJobName: s.attachSuffix("clickhouse_test_destination_type_conversion"), + FlowJobName: s.attachSuffix("clickhouse_test_dest_type_conv"), TableNameMapping: map[string]string{srcFullName: dstTableName}, Destination: s.Peer().Name, } @@ -639,8 +657,8 @@ func (s ClickHouseSuite) Test_Destination_Type_Conversion() { require.Len(s.t, rows.Records, 4, "expected 4 rows") for i, row := range rows.Records { require.Len(s.t, row, 2, "expected 2 columns") - require.Equal(s.t, qvalue.QValueKindString, row[0].Kind(), "c1 type mismatch") - require.Equal(s.t, qvalue.QValueKindString, row[1].Kind(), "c2 type mismatch") + require.Equal(s.t, types.QValueKindString, row[0].Kind(), "c1 type mismatch") + require.Equal(s.t, types.QValueKindString, row[1].Kind(), "c2 type mismatch") require.Equal(s.t, strings.Repeat("9", 77), row[0].Value(), "c1 value mismatch") if i%2 == 0 { require.Equal(s.t, strings.Repeat("9", 78), row[1].Value(), "c2 value mismatch") @@ -716,6 +734,229 @@ func (s ClickHouseSuite) Test_Unbounded_Numeric_Without_FF() { s.testNumericFF(false) } +func (s ClickHouseSuite) testNumericTruncation(unbNumAsStringFf bool) { + var pgSource *e2e.PostgresSource + var ok bool + if pgSource, ok = s.source.(*e2e.PostgresSource); !ok { + s.t.Skip("only applies to postgres") + } + + nines := func(integer, fraction int) string { + integerStr := strings.Repeat("9", integer) + if integer == 0 { + integerStr = "0" + } + if fraction > 0 { + return integerStr + "." + strings.Repeat("9", fraction) + } + return integerStr + } + //nolint:govet // it's a test, no need for fieldalignment + tests := []struct { + SrcType string + SrcValue string + Expected string + ExpectedCleared int + ExpectedTrunated int + ExpectedWithFF string // if empty, same as above + }{ + {SrcType: "numeric", SrcValue: nines(38, 38), Expected: nines(38, 38)}, + {SrcType: "numeric", SrcValue: nines(39, 0), Expected: "0", ExpectedCleared: 1, ExpectedWithFF: nines(39, 0)}, + {SrcType: "numeric", SrcValue: nines(0, 39), Expected: nines(0, 38), ExpectedTrunated: 1, ExpectedWithFF: nines(0, 39)}, + {SrcType: "numeric(96, 48)", SrcValue: nines(48, 48), Expected: nines(48, 48)}, + {SrcType: "numeric(76, 38)", SrcValue: nines(38, 38), Expected: nines(38, 38)}, + {SrcType: "numeric(76, 0)", SrcValue: nines(76, 0), Expected: nines(76, 0)}, + {SrcType: "numeric(76, 76)", SrcValue: nines(0, 76), Expected: nines(0, 76)}, + } + + totalCleared := 0 + totalTruncated := 0 + if !unbNumAsStringFf { + for _, tc := range tests { + totalCleared += tc.ExpectedCleared + totalTruncated += tc.ExpectedTrunated + } + } + + dstTableName := fmt.Sprintf("numeric_truncation_unas_ff_%v", unbNumAsStringFf) + srcFullName := s.attachSchemaSuffix(dstTableName) + + var sb strings.Builder + fmt.Fprintf(&sb, "CREATE TABLE IF NOT EXISTS %s(\n", srcFullName) + sb.WriteString("id INT PRIMARY KEY GENERATED BY DEFAULT AS IDENTITY") + for i, tc := range tests { + fmt.Fprintf(&sb, ",\ncol%d %s", i, tc.SrcType) + fmt.Fprintf(&sb, ",\ncol%d_neg %s", i, tc.SrcType) + fmt.Fprintf(&sb, ",\ncol%d_arr %s[]", i, tc.SrcType) + } + sb.WriteString(")") + + createQuery := sb.String() + _, err := s.Conn().Exec(s.t.Context(), createQuery) + require.NoError(s.t, err) + + sb.Reset() + fmt.Fprintf(&sb, "INSERT INTO %s(", srcFullName) + for i := range tests { + if i > 0 { + fmt.Fprint(&sb, ", ") + } + fmt.Fprintf(&sb, "col%d, col%d_neg, col%d_arr", i, i, i) + } + fmt.Fprint(&sb, ") VALUES(") + for i, tc := range tests { + if i > 0 { + fmt.Fprint(&sb, ", ") + } + fmt.Fprintf(&sb, "%s::numeric", tc.SrcValue) + fmt.Fprint(&sb, ", ") + fmt.Fprintf(&sb, "-%s::numeric", tc.SrcValue) + fmt.Fprint(&sb, ", ") + fmt.Fprintf(&sb, "array[%s, -%s]::numeric[]", tc.SrcValue, tc.SrcValue) + } + fmt.Fprint(&sb, ")") + insertQuery := sb.String() + + _, err = s.Conn().Exec(s.t.Context(), insertQuery) + require.NoError(s.t, err) + + flowJobName := s.attachSuffix(fmt.Sprintf("clickhouse_test_num_trunc_ff_%v", unbNumAsStringFf)) + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: flowJobName, + TableNameMapping: map[string]string{srcFullName: dstTableName}, + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + flowConnConfig.Env = map[string]string{"PEERDB_CLICKHOUSE_UNBOUNDED_NUMERIC_AS_STRING": strconv.FormatBool(unbNumAsStringFf)} + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + + e2e.EnvWaitForCount(env, s, "waiting for CDC count", dstTableName, "id", 1) + if totalCleared > 0 { + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for cleared messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "cleared 1 NUMERIC value too big to fit into the destination column", + ) + return err == nil && count == totalCleared*2 // positive and negative + }) + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for cleared array messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "cleared 2 NUMERIC values too big to fit into the destination column", + ) + return err == nil && count == totalCleared + }) + } + if totalTruncated > 0 { + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for truncated messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "truncated 1 NUMERIC value too precise to fit into the destination column", + ) + return err == nil && count == totalTruncated*2 // positive and negative + }) + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for truncated array messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "truncated 2 NUMERIC values too precise to fit into the destination column", + ) + return err == nil && count == totalTruncated + }) + } + + _, err = s.Conn().Exec(s.t.Context(), insertQuery) + require.NoError(s.t, err) + e2e.EnvWaitForCount(env, s, "waiting for CDC count", dstTableName, "id", 2) + + if totalCleared > 0 { + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for cleared messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "cleared 1 NUMERIC value too big to fit into the destination column", + ) + return err == nil && count == totalCleared*2*2 // positive and negative, snapshot and cdc + }) + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for cleared array messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "cleared 2 NUMERIC values too big to fit into the destination column", + ) + return err == nil && count == totalCleared*2 // snapshot and cdc + }) + } + if totalTruncated > 0 { + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for truncated messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "truncated 1 NUMERIC value too precise to fit into the destination column", + ) + return err == nil && count == totalTruncated*2*2 // positive and negative, snapshot and cdc + }) + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for truncated array messages", func() bool { + count, err := pgSource.GetLogCount( + s.t.Context(), flowJobName, "warn", "truncated 2 NUMERIC values too precise to fit into the destination column", + ) + return err == nil && count == totalTruncated*2 // snapshot and cdc + }) + } + + sb.Reset() + for i := range tests { + if i > 0 { + fmt.Fprint(&sb, ", ") + } + fmt.Fprintf(&sb, "col%d, col%d_neg, col%d_arr", i, i, i) + } + selectCols := sb.String() + + ninesRegex := regexp.MustCompile(`^(0?)(9*)\.?(9*)`) + countNines := func(value string) string { + if len(value) < 10 { + return value + } + submatches := ninesRegex.FindStringSubmatch(value) + if submatches[1] == "0" { + return fmt.Sprintf("nines(0, %d)", len(submatches[3])) + } + return fmt.Sprintf("nines(%d, %d)", len(submatches[2]), len(submatches[3])) + } + rows, err := s.GetRows(dstTableName, selectCols) + require.NoError(s.t, err) + require.Len(s.t, rows.Records, 2) + for _, row := range rows.Records { + require.Len(s.t, row, 3*len(tests)) + for i, tc := range tests { + testName := fmt.Sprintf("col%d: type=%s value=%s ff=%t", i, tc.SrcType, countNines(tc.SrcValue), unbNumAsStringFf) + + expected := tc.Expected + if unbNumAsStringFf && tc.ExpectedWithFF != "" { + expected = tc.ExpectedWithFF + } + assert.Equal(s.t, expected, fmt.Sprint(row[3*i].Value()), testName) + + negExpected := "-" + expected + if negExpected == "-0" { + negExpected = "0" + } + assert.Equal(s.t, negExpected, fmt.Sprint(row[3*i+1].Value()), testName+" negative") + + arr := row[3*i+2].Value() + rArr := reflect.ValueOf(arr) + if assert.Equal(s.t, 2, rArr.Len(), testName+" array length") { + assert.Equal(s.t, expected, fmt.Sprint(rArr.Index(0).Interface()), testName+" in array") + assert.Equal(s.t, negExpected, fmt.Sprint(rArr.Index(1).Interface()), testName+" negative in array") + } + } + } + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) +} + +func (s ClickHouseSuite) Test_Numeric_Truncation_With_UnbNumAsString_FF() { + s.testNumericTruncation(true) +} + +func (s ClickHouseSuite) Test_Numeric_Truncation_Without_UnbNumAsString_FF() { + s.testNumericTruncation(false) +} + const binaryFormatTestcase = "\x00\x010123\x7f\xff" // PEERDB_CLICKHOUSE_BINARY_FORMAT @@ -797,7 +1038,8 @@ func (s ClickHouseSuite) Test_Types_CH() { c23 NUMERIC,c24 OID,c28 REAL,c29 SMALLINT,c30 SMALLSERIAL,c31 SERIAL,c32 TEXT, c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME,c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, c39 TXID_SNAPSHOT,c40 UUID, c41 mood[], c42 INT[], c43 FLOAT[], c44 TEXT[], c45 mood, c46 HSTORE, - c47 DATE[], c48 TIMESTAMPTZ[], c49 TIMESTAMP[], c50 BOOLEAN[], c51 SMALLINT[], c52 UUID[]); + c47 DATE[], c48 TIMESTAMPTZ[], c49 TIMESTAMP[], c50 BOOLEAN[], c51 SMALLINT[], c52 UUID[], + c53 NUMERIC(16,2)[], c54 NUMERIC[], c55 NUMERIC(16,2)[], c56 NUMERIC[], c57 INTERVAL[]); INSERT INTO %[1]s SELECT 2,2,b'1',b'101', true,random_bytes(32),'s','test','1.1.10.2'::cidr, CURRENT_DATE,1.23,1.234,'10.0.0.0/32'::inet,1, @@ -818,7 +1060,9 @@ func (s ClickHouseSuite) Test_Types_CH() { '{"2020-01-01 01:01:01", "2020-01-02 01:01:01"}'::timestamp[], '{true, false}'::boolean[], '{1, 2}'::smallint[], - '{"66073c38-b8df-4bdb-bbca-1c97596b8940","66073c38-b8df-4bdb-bbca-1c97596b8940"}'::uuid[];`, + '{"66073c38-b8df-4bdb-bbca-1c97596b8940","66073c38-b8df-4bdb-bbca-1c97596b8940"}'::uuid[], + '{1.2, 1.23, null}'::numeric(16,2)[], '{1.2, 1.23, null}'::numeric[], null::numeric(16,2)[], null::numeric[], + '{1 second, 5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds}'::interval[];`, srcFullName)) require.NoError(s.t, err) @@ -835,7 +1079,7 @@ func (s ClickHouseSuite) Test_Types_CH() { e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) e2e.EnvWaitForCount(env, s, "waiting for initial snapshot count", dstTableName, "id", 1) e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 1", srcTableName, dstTableName, - "id,c1,c4,c7,c8,c11,c12,c13,c15,c23,c28,c29,c30,c31,c32,c33,c34,c35,c36,c40,c41,c42,c43,c44,c45,c52") + "id,c1,c4,c7,c8,c11,c12,c13,c15,c23,c28,c29,c30,c31,c32,c33,c34,c35,c36,c40,c41,c42,c43,c44,c45,c48,c49,c52,c53,c54,c55,c56,c57") _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf(` INSERT INTO %s SELECT 3,2,b'1',b'101', @@ -858,11 +1102,13 @@ func (s ClickHouseSuite) Test_Types_CH() { '{"2020-01-01 01:01:01", "2020-01-02 01:01:01"}'::timestamp[], '{true, false}'::boolean[], '{1, 2}'::smallint[], - '{"86073c38-b8df-4bdb-bbca-1c97596b8940","66073c38-b8df-4bdb-bbca-1c97596b8940"}'::uuid[];`, srcFullName)) + '{"86073c38-b8df-4bdb-bbca-1c97596b8940","66073c38-b8df-4bdb-bbca-1c97596b8940"}'::uuid[], + '{2.2, 2.23, null}'::numeric(16,2)[], '{2.2, 2.23, null}'::numeric[], null::numeric(16,2)[], null::numeric[], + '{1 second, 5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds}'::interval[];`, srcFullName)) require.NoError(s.t, err) e2e.EnvWaitForCount(env, s, "waiting for CDC count", dstTableName, "id", 2) e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 2", srcTableName, dstTableName, - "id,c1,c4,c7,c8,c11,c12,c13,c15,c23,c28,c29,c30,c31,c32,c33,c34,c35,c36,c40,c41,c42,c43,c44,c45,c52") + "id,c1,c4,c7,c8,c11,c12,c13,c15,c23,c28,c29,c30,c31,c32,c33,c34,c35,c36,c40,c41,c42,c43,c44,c45,c48,c49,c52,c53,c54,c55,c56,c57") _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf(` UPDATE %[1]s SET c1=3,c32='testery' WHERE id=2; @@ -887,12 +1133,93 @@ func (s ClickHouseSuite) Test_Types_CH() { '{"2020-01-01 01:01:01", "2020-01-02 01:01:01"}'::timestamp[], '{true, false}'::boolean[], '{1, 2}'::smallint[], - '{"66073c38-b8df-4bdb-bbca-1c97596b8940","66073c38-b8df-4bdb-bbca-1c97596b8940"}'::uuid[];`, srcFullName)) + '{"66073c38-b8df-4bdb-bbca-1c97596b8940","66073c38-b8df-4bdb-bbca-1c97596b8940"}'::uuid[], + '{1.2, 1.23, null}'::numeric(16,2)[], '{1.2, 1.23, null}'::numeric[], null::numeric(16,2)[], null::numeric[], + '{1 second, 5 years 2 months 29 days 1 minute 2 seconds 200 milliseconds 20000 microseconds}'::interval[];`, srcFullName)) require.NoError(s.t, err) e2e.EnvWaitForCount(env, s, "waiting for CDC count again", dstTableName, "id", 3) e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 3", srcTableName, dstTableName, - "id,c1,c4,c7,c8,c11,c12,c13,c15,c23,c28,c29,c30,c31,c32,c33,c34,c35,c36,c40,c41,c42,c43,c44,c45,c52") + "id,c1,c4,c7,c8,c11,c12,c13,c15,c23,c28,c29,c30,c31,c32,c33,c34,c35,c36,c40,c41,c42,c43,c44,c45,c48,c49,c52,c53,c54,c55,c56,c57") + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) +} + +func (s ClickHouseSuite) Test_PgVector() { + if _, ok := s.source.(*e2e.PostgresSource); !ok { + s.t.Skip("only applies to postgres") + } + + srcTableName := "test_pgvector" + srcFullName := s.attachSchemaSuffix(srcTableName) + dstTableName := "test_pgvector" + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (id SERIAL PRIMARY KEY, v1 vector, hv halfvec, sv sparsevec)`, srcFullName))) + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3]','[1,2.5,3]','{1:1.5,3:3.5}/5')`, srcFullName))) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: e2e.AddSuffix(s, srcTableName), + TableMappings: e2e.TableMappings(s, srcTableName, dstTableName), + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 1", srcTableName, dstTableName, "id,v1,hv,sv") + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3.5]','[1,2,3.5]','{2:2.5,3:3.5}/5')`, srcFullName))) + e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 2", srcTableName, dstTableName, "id,v1,hv,sv") + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) +} + +func (s ClickHouseSuite) Test_PgVector_Version0() { + if _, ok := s.source.(*e2e.PostgresSource); !ok { + s.t.Skip("only applies to postgres") + } + + srcTableName := "test_pgvector" + srcTextTableName := "test_pgvector_text" + srcFullName := s.attachSchemaSuffix(srcTableName) + srcTextFullName := s.attachSchemaSuffix(srcTextTableName) + dstTableName := "test_pgvector" + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (id SERIAL PRIMARY KEY, v1 vector, hv halfvec, sv sparsevec)`, srcFullName))) + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (id SERIAL PRIMARY KEY, v1 text, hv text, sv text)`, srcTextFullName))) + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3]','[1,2.5,3]','{1:1.5,3:3.5}/5')`, srcFullName))) + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3]','[1,2.5,3]','{1:1.5,3:3.5}/5')`, srcTextFullName))) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: e2e.AddSuffix(s, srcTableName), + TableMappings: e2e.TableMappings(s, srcTableName, dstTableName), + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + flowConnConfig.Version = shared.InternalVersion_First + + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 1", srcTextTableName, dstTableName, "id,v1,hv,sv") + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3.5]','[1,2,3.5]','{2:2.5,3:3.5}/5')`, srcFullName))) + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3.5]','[1,2,3.5]','{2:2.5,3:3.5}/5')`, srcTextFullName))) + e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 2", srcTextTableName, dstTableName, "id,v1,hv,sv") env.Cancel(s.t.Context()) e2e.RequireEnvCanceled(s.t, env) @@ -940,6 +1267,7 @@ func (s ClickHouseSuite) Test_Column_Exclusion() { SyncedAtColName: "_PEERDB_SYNCED_AT", MaxBatchSize: 100, DoInitialSnapshot: true, + Version: shared.InternalVersion_Latest, } // wait for PeerFlowStatusQuery to finish setup @@ -1121,18 +1449,21 @@ func (s ClickHouseSuite) Test_Unprivileged_Postgres_Columns() { "#sync_me!" BOOLEAN, "2birds1stone" INT, "quo'te" TEXT, + "Анна Каренина" DOUBLE PRECISION, + "人間失格" CHAR(10), PRIMARY KEY (id, "id number") ); `, srcFullName)) require.NoError(s.t, err) _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf(` - INSERT INTO %s (key, "se'cret", "spacey column", "#sync_me!", "2birds1stone", "quo'te") - VALUES ('init_initial_load', 'secret', 'neptune', 'true', 509, 'abcd')`, srcFullName)) + INSERT INTO %s (key, "se'cret", "spacey column", "#sync_me!", "2birds1stone", "quo'te", "Анна Каренина", "人間失格") + VALUES ('init_initial_load', 'secret', 'neptune', 'true', 509, 'abcd', 3.14, '人間失格'); + `, srcFullName)) require.NoError(s.t, err) err = e2e.RevokePermissionForTableColumns(s.t.Context(), s.Conn(), srcFullName, - []string{"id", "id number", "key", "spacey column", "#sync_me!", "2birds1stone", "quo'te"}) + []string{"id", "id number", "key", "spacey column", "#sync_me!", "2birds1stone", "quo'te", "Анна Каренина", "人間失格"}) require.NoError(s.t, err) connectionGen := e2e.FlowConnectionGenerationConfig{ @@ -1152,14 +1483,15 @@ func (s ClickHouseSuite) Test_Unprivileged_Postgres_Columns() { e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on initial", srcTableName, dstTableName, - "id,\"id number\",key,\"spacey column\",\"#sync_me!\",\"2birds1stone\",\"quo'te\"") + "id,\"id number\",key,\"spacey column\",\"#sync_me!\",\"2birds1stone\",\"quo'te\",\"Анна Каренина\",\"人間失格\"") _, err = s.Conn().Exec(s.t.Context(), fmt.Sprintf(` - INSERT INTO %s (key, "se'cret", "spacey column", "#sync_me!", "2birds1stone","quo'te") - VALUES ('cdc1', 'secret', 'pluto', 'false', 123324, 'lwkfj')`, srcFullName)) + INSERT INTO %s (key, "se'cret", "spacey column", "#sync_me!", "2birds1stone","quo'te", "Анна Каренина", "人間失格") + VALUES ('cdc1', 'secret', 'pluto', 'false', 123324, 'lwkfj', 2.718, '人間失格'); + `, srcFullName)) require.NoError(s.t, err) e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on cdc", srcTableName, dstTableName, - "id,\"id number\",key,\"spacey column\",\"#sync_me!\",\"2birds1stone\",\"quo'te\"") + "id,\"id number\",key,\"spacey column\",\"#sync_me!\",\"2birds1stone\",\"quo'te\",\"Анна Каренина\",\"人間失格\"") env.Cancel(s.t.Context()) e2e.RequireEnvCanceled(s.t, env) } @@ -1191,6 +1523,157 @@ func (s ClickHouseSuite) Test_InitialLoadOnly_No_Primary_Key() { e2e.EnvWaitForFinished(s.t, env, time.Minute) } +// Test_Normalize_Metadata_With_Retry tests the chunking normalization +// with a push to ClickHouse thrown in via renaming a target table. +func (s ClickHouseSuite) Test_Normalize_Metadata_With_Retry() { + var pgSource *e2e.PostgresSource + var ok bool + if pgSource, ok = s.source.(*e2e.PostgresSource); !ok { + s.t.Skip("todo: only applies to postgres for now") + } + + srcTableName1 := "test_normalize_metadata_with_retry_1" + srcFullName1 := s.attachSchemaSuffix(srcTableName1) + dstTableName1 := "test_normalize_metadata_with_retry_dst_1" + srcTableName2 := "test_normalize_metadata_with_retry_2" + srcFullName2 := s.attachSchemaSuffix(srcTableName2) + dstTableName2 := "test_normalize_metadata_with_retry_dst_2" + + require.NoError(s.t, s.source.Exec(s.t.Context(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id INT PRIMARY KEY, + "key" TEXT NOT NULL + ); + `, srcFullName1))) + + require.NoError(s.t, s.source.Exec(s.t.Context(), fmt.Sprintf(` + CREATE TABLE IF NOT EXISTS %s ( + id INT PRIMARY KEY, + "key" TEXT NOT NULL + ); + `, srcFullName2))) + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`INSERT INTO %s (id,"key") VALUES (1,'init'),(2,'two'),(3,'tri'),(4,'cry')`, srcFullName1))) + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`INSERT INTO %s (id,"key") VALUES (1,'init'),(2,'two'),(3,'tri'),(4,'cry')`, srcFullName2))) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("test_normalize_metadata_with_retry"), + TableNameMapping: map[string]string{srcFullName1: dstTableName1, srcFullName2: dstTableName2}, + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + + e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on initial", srcTableName1, dstTableName1, "id,\"key\"") + + // Rename the table to simulate a push failure to ClickHouse + ch, err := connclickhouse.Connect(s.t.Context(), nil, s.Peer().GetClickhouseConfig()) + require.NoError(s.t, err) + fakeDestination2 := "test_normalize_metadata_with_retry_dst_2_fake" + renameErr := ch.Exec(s.t.Context(), fmt.Sprintf(`RENAME TABLE %s TO %s`, dstTableName2, fakeDestination2)) + require.NoError(s.t, renameErr) + require.NoError(s.t, s.source.Exec(s.t.Context(), fmt.Sprintf(`UPDATE %s SET "key"='update1'`, srcFullName2))) + + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for first sync to complete", func() bool { + rows, err := pgSource.Query(s.t.Context(), + fmt.Sprintf("SELECT sync_batch_id FROM metadata_last_sync_state WHERE job_name='%s'", + flowConnConfig.FlowJobName)) + if err != nil { + return false + } + + if len(rows.Records) == 0 { + return false + } + + return rows.Records[0][0].Value().(int64) == 1 + }) + + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for raw table push to complete", func() bool { + rows, err := s.GetRows(s.connector.GetRawTableName(connectionGen.FlowJobName), "_peerdb_batch_id") + if err != nil { + return false + } + + if len(rows.Records) != 4 { + return false + } + + return rows.Records[0][0].Value().(int64) == 1 + }) + + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for normalize error", func() bool { + errorCount, err := pgSource.GetLogCount( + s.t.Context(), flowConnConfig.FlowJobName, "error", "error while inserting into target clickhouse table", + ) + return err == nil && errorCount > 0 + }) + + // Rename the table back to simulate a successful push to ClickHouse + renameErr = ch.Exec(s.t.Context(), fmt.Sprintf(`RENAME TABLE %s TO %s`, fakeDestination2, dstTableName2)) + require.NoError(s.t, renameErr) + require.NoError(s.t, s.source.Exec(s.t.Context(), fmt.Sprintf(`UPDATE %s SET "key"='update2'`, srcFullName2))) + require.NoError(s.t, s.source.Exec(s.t.Context(), fmt.Sprintf(`UPDATE %s SET "key"='update2'`, srcFullName1))) + + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for second sync to complete", func() bool { + rows, err := pgSource.Query(s.t.Context(), + fmt.Sprintf("SELECT sync_batch_id FROM metadata_last_sync_state WHERE job_name='%s'", + flowConnConfig.FlowJobName)) + if err != nil { + return false + } + + if len(rows.Records) == 0 { + return false + } + + return rows.Records[0][0].Value().(int64) == 2 + }) + + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "waiting for second raw table push to complete", func() bool { + rows, err := s.GetRows(s.connector.GetRawTableName(connectionGen.FlowJobName), "_peerdb_batch_id") + if err != nil { + return false + } + + if len(rows.Records) != 12 { + return false + } + return true + }) + + e2e.EnvWaitFor(s.t, env, 5*time.Minute, "check normalize table metadata after normalize", func() bool { + rows, err := pgSource.Query(s.t.Context(), fmt.Sprintf(` + SELECT (table_batch_id_data->>'%s')::bigint, (table_batch_id_data->>'%s')::bigint + FROM metadata_last_sync_state WHERE job_name='%s'`, + dstTableName1, dstTableName2, flowConnConfig.FlowJobName)) + if err != nil { + s.t.Log("error querying metadata_last_sync_state:", err) + return false + } + + if len(rows.Records) == 0 { + s.t.Log("no records found in metadata_last_sync_state") + return false + } + s.t.Log("metadata_last_sync_state:", rows.Records[0][0].Value(), rows.Records[0][1].Value()) + return rows.Records[0][0].Value().(int64) == 2 && rows.Records[0][1].Value().(int64) == 2 + }) + + e2e.EnvWaitForEqualTablesWithNames(env, s, "after 2 batches of cdc for table 1", srcTableName1, dstTableName1, "id,\"key\"") + e2e.EnvWaitForEqualTablesWithNames(env, s, "after 2 batches of cdc for table 2", srcTableName2, dstTableName2, "id,\"key\"") + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) +} + func (s ClickHouseSuite) Test_Geometric_Types() { if _, ok := s.source.(*e2e.PostgresSource); !ok { s.t.Skip("only applies to postgres") @@ -1286,30 +1769,30 @@ func (s ClickHouseSuite) Test_Geometric_Types() { }{ { point: "POINT(1.000000 2.000000)", - line: "{1 2 3 true}", - lseg: "{[{1 2} {3 4}] true}", - box: "{[{3 4} {1 2}] true}", - path: "{[{1 2} {3 4} {5 6}] true true}", - polygon: "{[{1 2} {3 4} {5 6} {1 2}] true}", - circle: "{{1 2} 3 true}", + line: "{1,2,3}", + lseg: "[(1,2),(3,4)]", + box: "(3,4),(1,2)", + path: "((1,2),(3,4),(5,6))", + polygon: "((1,2),(3,4),(5,6),(1,2))", + circle: "<(1,2),3>", }, { point: "POINT(10.000000 20.000000)", - line: "{10 20 30 true}", - lseg: "{[{10 20} {30 40}] true}", - box: "{[{30 40} {10 20}] true}", - path: "{[{10 20} {30 40} {50 60}] true true}", - polygon: "{[{10 20} {30 40} {50 60} {10 20}] true}", - circle: "{{10 20} 30 true}", + line: "{10,20,30}", + lseg: "[(10,20),(30,40)]", + box: "(30,40),(10,20)", + path: "((10,20),(30,40),(50,60))", + polygon: "((10,20),(30,40),(50,60),(10,20))", + circle: "<(10,20),30>", }, { point: "POINT(100.000000 200.000000)", - line: "{100 200 300 true}", - lseg: "{[{100 200} {300 400}] true}", - box: "{[{300 400} {100 200}] true}", - path: "{[{100 200} {300 400} {500 600}] true true}", - polygon: "{[{100 200} {300 400} {500 600} {100 200}] true}", - circle: "{{100 200} 300 true}", + line: "{100,200,300}", + lseg: "[(100,200),(300,400)]", + box: "(300,400),(100,200)", + path: "((100,200),(300,400),(500,600))", + polygon: "((100,200),(300,400),(500,600),(100,200))", + circle: "<(100,200),300>", }, } @@ -1465,12 +1948,13 @@ func (s ClickHouseSuite) Test_Extra_CH_Columns() { } func (s ClickHouseSuite) Test_NullEngine() { + chPeer := s.Peer().GetClickhouseConfig() srcTableName := "test_nullengine" srcFullName := s.attachSchemaSuffix(srcTableName) dstTableName := "test_nullengine" require.NoError(s.t, s.source.Exec(s.t.Context(), - fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (id INT PRIMARY KEY, "key" TEXT NOT NULL)`, srcFullName))) + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (id INT PRIMARY KEY, "key" TEXT NOT NULL, val TEXT)`, srcFullName))) connectionGen := e2e.FlowConnectionGenerationConfig{ FlowJobName: s.attachSuffix("clickhouse_nullengine"), @@ -1487,7 +1971,7 @@ func (s ClickHouseSuite) Test_NullEngine() { env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) - ch, err := connclickhouse.Connect(s.t.Context(), nil, s.Peer().GetClickhouseConfig()) + ch, err := connclickhouse.Connect(s.t.Context(), nil, chPeer) require.NoError(s.t, err) require.NoError(s.t, ch.Exec(s.t.Context(), `create table nulltarget (id Int32, "key" String, _peerdb_is_deleted Int8) engine = ReplacingMergeTree() order by id`)) @@ -1496,9 +1980,154 @@ func (s ClickHouseSuite) Test_NullEngine() { require.NoError(s.t, ch.Close()) require.NoError(s.t, s.source.Exec(s.t.Context(), - fmt.Sprintf(`insert into %s values (1, 'cdc')`, srcFullName))) + fmt.Sprintf(`insert into %s values (1, 'cdc', 'val')`, srcFullName))) e2e.EnvWaitForEqualTablesWithNames(env, s, "null insert", srcTableName, "nulltarget", "id,\"key\"") + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) + env = e2e.ExecuteWorkflow(s.t.Context(), tc, shared.PeerFlowTaskQueue, peerflow.DropFlowWorkflow, &protos.DropFlowInput{ + FlowJobName: flowConnConfig.FlowJobName, + DropFlowStats: false, + FlowConnectionConfigs: flowConnConfig, + }) + e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) + + require.NoError(s.t, s.source.Exec(s.t.Context(), fmt.Sprintf("ALTER TABLE %s DROP COLUMN val", srcFullName))) + + ch, err = connclickhouse.Connect(s.t.Context(), nil, chPeer) + require.NoError(s.t, err) + require.NoError(s.t, ch.Exec(s.t.Context(), "TRUNCATE TABLE nulltarget")) + require.NoError(s.t, ch.Close()) + flowConnConfig.DoInitialSnapshot = true + flowConnConfig.Resync = true + env = e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on initial", srcTableName, "nulltarget", "id,\"key\"") + + var count uint64 + ch, err = connclickhouse.Connect(s.t.Context(), nil, chPeer) + require.NoError(s.t, err) + row := ch.QueryRow(s.t.Context(), + fmt.Sprintf("select count(*) from system.columns where database = '%s' and table = 'test_nullengine'", chPeer.Database)) + require.NoError(s.t, row.Err()) + require.NoError(s.t, row.Scan(&count)) + require.NoError(s.t, ch.Close()) + require.Equal(s.t, uint64(5), count) + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) +} + +func (s ClickHouseSuite) Test_Partition_Key_Integer() { + srcTableName := "test_partition_key_integer" + srcFullName := s.attachSchemaSuffix(srcTableName) + dstTableName := "test_partition_key_integer" + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s ( + id INT PRIMARY KEY, + myname TEXT NOT NULL, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)`, srcFullName))) + + for i := 1; i <= 100; i++ { + if _, ok := s.source.(*e2e.PostgresSource); ok { + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`INSERT INTO %s (id,myname,updated_at) + VALUES (%d,'init_%d',CURRENT_TIMESTAMP + INTERVAL '%d seconds')`, + srcFullName, i, i, i))) + } else { + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`INSERT INTO %s (id,myname,updated_at) + VALUES (%d,'init_%d',CURRENT_TIMESTAMP + INTERVAL %d SECOND)`, + srcFullName, i, i, i))) + } + } + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("clickhouse_partition_key_integer"), + TableMappings: []*protos.TableMapping{{ + SourceTableIdentifier: srcFullName, + DestinationTableIdentifier: dstTableName, + PartitionKey: "id", + }}, + Destination: s.Peer().Name, + } + + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + flowConnConfig.SnapshotMaxParallelWorkers = 4 + flowConnConfig.SnapshotNumRowsPerPartition = 10 + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + + e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on initial", srcTableName, dstTableName, "id,myname,updated_at") + + countRow := s.Conn().QueryRow(s.t.Context(), + `SELECT COUNT(*) FROM peerdb_stats.qrep_partitions WHERE parent_mirror_name = $1`, + flowConnConfig.FlowJobName) + + var partitionCount int + require.NoError(s.t, countRow.Scan(&partitionCount), "failed to get partition count") + require.GreaterOrEqual(s.t, partitionCount, 10, "expected at least 10 partitions to be created") + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) +} + +func (s ClickHouseSuite) Test_Partition_Key_Timestamp() { + srcTableName := "test_partition_key_timestamp" + srcFullName := s.attachSchemaSuffix(srcTableName) + dstTableName := "test_partition_key_timestamp" + + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s ( + id INT PRIMARY KEY, + myname TEXT NOT NULL, + updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)`, srcFullName))) + + for i := 1; i <= 100; i++ { + if _, ok := s.source.(*e2e.PostgresSource); ok { + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`INSERT INTO %s (id,myname,updated_at) + VALUES (%d,'init_%d',CURRENT_TIMESTAMP + INTERVAL '%d seconds')`, + srcFullName, i, i, i))) + } else { + require.NoError(s.t, s.source.Exec(s.t.Context(), + fmt.Sprintf(`INSERT INTO %s (id,myname,updated_at) + VALUES (%d,'init_%d',CURRENT_TIMESTAMP + INTERVAL %d SECOND)`, + srcFullName, i, i, i))) + } + } + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: s.attachSuffix("clickhouse_partition_key_timestamp"), + TableMappings: []*protos.TableMapping{{ + SourceTableIdentifier: srcFullName, + DestinationTableIdentifier: dstTableName, + PartitionKey: "updated_at", + }}, + Destination: s.Peer().Name, + } + + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + flowConnConfig.SnapshotMaxParallelWorkers = 4 + flowConnConfig.SnapshotNumRowsPerPartition = 10 + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + + e2e.EnvWaitForEqualTablesWithNames(env, s, "waiting on initial", srcTableName, dstTableName, "id,myname,updated_at") + + countRow := s.Conn().QueryRow(s.t.Context(), + `SELECT COUNT(*) FROM peerdb_stats.qrep_partitions WHERE parent_mirror_name = $1`, + flowConnConfig.FlowJobName) + + var partitionCount int + require.NoError(s.t, countRow.Scan(&partitionCount), "failed to get partition count") + require.GreaterOrEqual(s.t, partitionCount, 10, "expected at least 10 partitions to be created") + env.Cancel(s.t.Context()) e2e.RequireEnvCanceled(s.t, env) } diff --git a/flow/e2e/congen.go b/flow/e2e/congen.go index 7b77ed6c9e..8e332102e7 100644 --- a/flow/e2e/congen.go +++ b/flow/e2e/congen.go @@ -11,6 +11,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" + "github.com/PeerDB-io/peerdb/flow/shared" ) type SuiteSource interface { @@ -74,6 +75,7 @@ func (c *FlowConnectionGenerationConfig) GenerateFlowConnectionConfigs(s Suite) DestinationName: c.Destination, SyncedAtColName: "_PEERDB_SYNCED_AT", IdleTimeoutSeconds: 15, + Version: shared.InternalVersion_Latest, } if c.SoftDelete { ret.SoftDeleteColName = "_PEERDB_IS_DELETED" diff --git a/flow/e2e/eventhub/eventhub.go b/flow/e2e/eventhub/eventhub.go index d046c2b4bf..2c8bbc30e8 100644 --- a/flow/e2e/eventhub/eventhub.go +++ b/flow/e2e/eventhub/eventhub.go @@ -6,7 +6,7 @@ import ( "time" "github.com/Azure/azure-sdk-for-go/sdk/azidentity" - "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs" + "github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/v2" "github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub" "github.com/PeerDB-io/peerdb/flow/generated/protos" diff --git a/flow/e2e/generic/generic_test.go b/flow/e2e/generic/generic_test.go index 7fc150016e..f88c9b1fdb 100644 --- a/flow/e2e/generic/generic_test.go +++ b/flow/e2e/generic/generic_test.go @@ -17,8 +17,8 @@ import ( e2e_snowflake "github.com/PeerDB-io/peerdb/flow/e2e/snowflake" "github.com/PeerDB-io/peerdb/flow/e2eshared" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" peerflow "github.com/PeerDB-io/peerdb/flow/workflows" ) @@ -219,27 +219,27 @@ func (s Generic) Test_Simple_Schema_Changes() { Columns: []*protos.FieldDescription{ { Name: e2e.ExpectedDestinationIdentifier(s, "id"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c1"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: "_PEERDB_IS_DELETED", - Type: string(qvalue.QValueKindBoolean), + Type: string(types.QValueKindBoolean), TypeModifier: -1, }, { Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, }, }, } - output, err := destinationSchemaConnector.GetTableSchema(t.Context(), nil, protos.TypeSystem_Q, + output, err := destinationSchemaConnector.GetTableSchema(t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: dstTableName}}) e2e.EnvNoError(t, env, err) e2e.EnvTrue(t, env, e2e.CompareTableSchemas(expectedTableSchema, output[dstTableName])) @@ -255,27 +255,27 @@ func (s Generic) Test_Simple_Schema_Changes() { Columns: []*protos.FieldDescription{ { Name: e2e.ExpectedDestinationIdentifier(s, "id"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c1"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c2"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, }, } - output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, protos.TypeSystem_Q, + output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: dstTableName}}) e2e.EnvNoError(t, env, err) e2e.EnvTrue(t, env, e2e.CompareTableSchemas(expectedTableSchema, output[dstTableName])) @@ -292,32 +292,32 @@ func (s Generic) Test_Simple_Schema_Changes() { Columns: []*protos.FieldDescription{ { Name: e2e.ExpectedDestinationIdentifier(s, "id"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c1"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c2"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c3"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, }, } - output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, protos.TypeSystem_Q, + output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: dstTableName}}) e2e.EnvNoError(t, env, err) e2e.EnvTrue(t, env, e2e.CompareTableSchemas(expectedTableSchema, output[dstTableName])) @@ -334,32 +334,32 @@ func (s Generic) Test_Simple_Schema_Changes() { Columns: []*protos.FieldDescription{ { Name: e2e.ExpectedDestinationIdentifier(s, "id"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c1"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c2"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c3"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, }, } - output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, protos.TypeSystem_Q, + output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: dstTableName}}) e2e.EnvNoError(t, env, err) e2e.EnvTrue(t, env, e2e.CompareTableSchemas(expectedTableSchema, output[dstTableName])) @@ -475,27 +475,27 @@ func (s Generic) Test_Schema_Changes_Cutoff_Bug() { Columns: []*protos.FieldDescription{ { Name: e2e.ExpectedDestinationIdentifier(s, "id"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c1"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c2"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: "_PEERDB_IS_DELETED", - Type: string(qvalue.QValueKindBoolean), + Type: string(types.QValueKindBoolean), TypeModifier: -1, }, { Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, }, }, @@ -505,27 +505,27 @@ func (s Generic) Test_Schema_Changes_Cutoff_Bug() { Columns: []*protos.FieldDescription{ { Name: e2e.ExpectedDestinationIdentifier(s, "id"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: e2e.ExpectedDestinationIdentifier(s, "c1"), - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: -1, }, { Name: "_PEERDB_IS_DELETED", - Type: string(qvalue.QValueKindBoolean), + Type: string(types.QValueKindBoolean), TypeModifier: -1, }, { Name: "_PEERDB_SYNCED_AT", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, }, }, } - output, err := destinationSchemaConnector.GetTableSchema(t.Context(), nil, protos.TypeSystem_Q, + output, err := destinationSchemaConnector.GetTableSchema(t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: dstTableName1}, {SourceTableIdentifier: dstTableName2}}) e2e.EnvNoError(t, env, err) e2e.EnvTrue(t, env, e2e.CompareTableSchemas(expectedTableSchema1, output[dstTableName1])) @@ -537,7 +537,7 @@ func (s Generic) Test_Schema_Changes_Cutoff_Bug() { // verify we got our two rows, if schema did not match up it will error. e2e.EnvWaitForEqualTablesWithNames(env, s, "table1 added column", srcTable1, dstTable1, "id,c1,coalesce(c2,0) c2") e2e.EnvWaitForEqualTablesWithNames(env, s, "table2 added column", srcTable2, dstTable2, "id,c1,coalesce(c2,0) c2") - output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, protos.TypeSystem_Q, + output, err = destinationSchemaConnector.GetTableSchema(t.Context(), nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: dstTableName1}, {SourceTableIdentifier: dstTableName2}}) e2e.EnvNoError(t, env, err) e2e.EnvTrue(t, env, e2e.CompareTableSchemas(expectedTableSchema1, output[dstTableName1])) diff --git a/flow/e2e/mongo/mongo.go b/flow/e2e/mongo/mongo.go new file mode 100644 index 0000000000..6c6c2374c5 --- /dev/null +++ b/flow/e2e/mongo/mongo.go @@ -0,0 +1,79 @@ +package e2e_mongo + +import ( + "context" + "errors" + "testing" + + "go.mongodb.org/mongo-driver/v2/bson" + + "github.com/PeerDB-io/peerdb/flow/connectors" + connmongo "github.com/PeerDB-io/peerdb/flow/connectors/mongo" + "github.com/PeerDB-io/peerdb/flow/e2e" + "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/model" +) + +type MongoSource struct { + conn *connmongo.MongoConnector + config *protos.MongoConfig +} + +func (s *MongoSource) GeneratePeer(t *testing.T) *protos.Peer { + t.Helper() + peer := &protos.Peer{ + Name: "mongo", + Type: protos.DBType_MONGO, + Config: &protos.Peer_MongoConfig{ + MongoConfig: s.config, + }, + } + e2e.CreatePeer(t, peer) + return peer +} + +func (s *MongoSource) Teardown(t *testing.T, ctx context.Context, suffix string) { + t.Helper() + db := s.conn.Client().Database(GetTestDatabase(suffix)) + _ = db.Drop(t.Context()) +} + +func (s *MongoSource) Connector() connectors.Connector { + return s.conn +} + +func (s *MongoSource) Exec(ctx context.Context, sql string) error { + return errors.ErrUnsupported +} + +func (s *MongoSource) GetRows(ctx context.Context, suffix, table, cols string) (*model.QRecordBatch, error) { + collection := s.conn.Client().Database(GetTestDatabase(suffix)).Collection(table) + cursor, err := collection.Find(ctx, bson.D{}) + if err != nil { + return nil, err + } + + recordBatch := &model.QRecordBatch{ + Schema: connmongo.GetDefaultSchema(), + Records: nil, + } + + for cursor.Next(ctx) { + var doc bson.D + err := cursor.Decode(&doc) + if err != nil { + return nil, err + } + record, _, err := connmongo.QValuesFromDocument(doc) + if err != nil { + return nil, err + } + recordBatch.Records = append(recordBatch.Records, record) + } + + return recordBatch, nil +} + +func GetTestDatabase(suffix string) string { + return "e2e_test_" + suffix +} diff --git a/flow/e2e/mongo/mongo_test.go b/flow/e2e/mongo/mongo_test.go new file mode 100644 index 0000000000..42f0db8b92 --- /dev/null +++ b/flow/e2e/mongo/mongo_test.go @@ -0,0 +1,211 @@ +package e2e_mongo + +import ( + "fmt" + "os" + "strings" + "testing" + + "github.com/stretchr/testify/require" + "go.mongodb.org/mongo-driver/v2/bson" + "go.mongodb.org/mongo-driver/v2/mongo/options" + + connmongo "github.com/PeerDB-io/peerdb/flow/connectors/mongo" + "github.com/PeerDB-io/peerdb/flow/e2e" + e2e_clickhouse "github.com/PeerDB-io/peerdb/flow/e2e/clickhouse" + "github.com/PeerDB-io/peerdb/flow/e2eshared" + "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/shared" + peerflow "github.com/PeerDB-io/peerdb/flow/workflows" +) + +type MongoClickhouseSuite struct { + e2e.GenericSuite +} + +func TestMongoClickhouseSuite(t *testing.T) { + e2eshared.RunSuite(t, SetupMongoClickhouseSuite) +} + +func SetupMongoClickhouseSuite(t *testing.T) MongoClickhouseSuite { + t.Helper() + return MongoClickhouseSuite{e2e_clickhouse.SetupSuite(t, func(t *testing.T) (*MongoSource, string, error) { + t.Helper() + suffix := "mongoch_" + strings.ToLower(shared.RandomString(8)) + source, err := SetupMongo(t, suffix) + return source, suffix, err + })(t)} +} + +func SetupMongo(t *testing.T, suffix string) (*MongoSource, error) { + t.Helper() + + mongoVersion := os.Getenv("CI_MONGO_VERSION") + require.NotEmpty(t, mongoVersion, "missing CI_MONGO_VERSION env var") + + mongoUri := os.Getenv("CI_MONGO_URI") + require.NotEmpty(t, mongoUri, "missing CI_MONGO_URI env var") + + mongoConfig := &protos.MongoConfig{Uri: mongoUri} + + mongoConn, err := connmongo.NewMongoConnector(t.Context(), mongoConfig) + require.NoError(t, err, "failed to setup mongo connector") + + testDb := GetTestDatabase(suffix) + db := mongoConn.Client().Database(testDb) + _ = db.Drop(t.Context()) + + return &MongoSource{conn: mongoConn, config: mongoConfig}, err +} + +func (s MongoClickhouseSuite) Test_Simple_Flow() { + t := s.T() + srcDatabase := GetTestDatabase(s.Suffix()) + srcTable := "test_simple" + dstTable := "test_simple_dst" + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: e2e.AddSuffix(s, srcTable), + TableMappings: e2e.TableMappings(s, srcTable, dstTable), + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + + client := s.Source().Connector().(*connmongo.MongoConnector).Client() + collection := client.Database(srcDatabase).Collection(srcTable) + // insert 10 rows into the source table for initial load + for i := range 10 { + testKey := fmt.Sprintf("init_key_%d", i) + testValue := fmt.Sprintf("init_value_%d", i) + res, err := collection.InsertOne(t.Context(), bson.D{bson.E{Key: testKey, Value: testValue}}, options.InsertOne()) + require.NoError(t, err) + require.True(t, res.Acknowledged) + } + + tc := e2e.NewTemporalClient(t) + env := e2e.ExecutePeerflow(t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + + e2e.EnvWaitForEqualTablesWithNames(env, s, "initial load to match", srcTable, dstTable, "_id,_full_document") + + e2e.SetupCDCFlowStatusQuery(t, env, flowConnConfig) + // insert 10 rows into the source table for cdc + for i := range 10 { + testKey := fmt.Sprintf("test_key_%d", i) + testValue := fmt.Sprintf("test_value_%d", i) + res, err := collection.InsertOne(t.Context(), bson.D{bson.E{Key: testKey, Value: testValue}}, options.InsertOne()) + require.NoError(t, err) + require.True(t, res.Acknowledged) + } + + e2e.EnvWaitForEqualTablesWithNames(env, s, "cdc events to match", srcTable, dstTable, "_id,_full_document") + env.Cancel(t.Context()) + e2e.RequireEnvCanceled(t, env) +} + +func (s MongoClickhouseSuite) Test_Inconsistent_Schema() { + t := s.T() + + srcDatabase := GetTestDatabase(s.Suffix()) + srcTable := "test_schema_change" + dstTable := "test_schema_change_dst" + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: e2e.AddSuffix(s, srcTable), + TableMappings: e2e.TableMappings(s, srcTable, dstTable), + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + + client := s.Source().Connector().(*connmongo.MongoConnector).Client() + collection := client.Database(srcDatabase).Collection(srcTable) + + // adding/removing fields should work + docs := []bson.D{ + {bson.E{Key: "field1", Value: 1}}, + {bson.E{Key: "field1", Value: 2}, bson.E{Key: "field2", Value: "v1"}}, + {bson.E{Key: "field2", Value: "v2"}}, + } + for _, doc := range docs { + res, err := collection.InsertOne(t.Context(), doc, options.InsertOne()) + require.NoError(t, err) + require.True(t, res.Acknowledged) + } + + tc := e2e.NewTemporalClient(t) + env := e2e.ExecutePeerflow(t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvWaitForEqualTablesWithNames(env, s, "initial load to match", srcTable, dstTable, "_id,_full_document") + + e2e.SetupCDCFlowStatusQuery(t, env, flowConnConfig) + + // inconsistent data type for a given field should work + docs = []bson.D{ + {bson.E{Key: "field3", Value: 3}}, + {bson.E{Key: "field3", Value: "3"}}, + } + for _, doc := range docs { + res, err := collection.InsertOne(t.Context(), doc, options.InsertOne()) + require.NoError(t, err) + require.True(t, res.Acknowledged) + } + e2e.EnvWaitForEqualTablesWithNames(env, s, "cdc events to match", srcTable, dstTable, "_id,_full_document") + + env.Cancel(t.Context()) + e2e.RequireEnvCanceled(t, env) +} + +func (s MongoClickhouseSuite) Test_Update_Replace_Delete_Events() { + t := s.T() + + srcDatabase := GetTestDatabase(s.Suffix()) + srcTable := "test_update_replace_delete" + dstTable := "test_update_replace_delete_dst" + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: e2e.AddSuffix(s, srcTable), + TableMappings: e2e.TableMappings(s, srcTable, dstTable), + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + + client := s.Source().Connector().(*connmongo.MongoConnector).Client() + collection := client.Database(srcDatabase).Collection(srcTable) + + insertRes, err := collection.InsertOne(t.Context(), bson.D{bson.E{Key: "key", Value: 1}}, options.InsertOne()) + require.NoError(t, err) + require.True(t, insertRes.Acknowledged) + + tc := e2e.NewTemporalClient(t) + env := e2e.ExecutePeerflow(t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.EnvWaitForEqualTablesWithNames(env, s, "initial load", srcTable, dstTable, "_id,_full_document") + + e2e.SetupCDCFlowStatusQuery(t, env, flowConnConfig) + + updateRes, err := collection.UpdateOne( + t.Context(), + bson.D{bson.E{Key: "key", Value: 1}}, + bson.D{bson.E{Key: "$set", Value: bson.D{bson.E{Key: "key", Value: 2}}}}, + options.UpdateOne()) + require.NoError(t, err) + require.Equal(t, int64(1), updateRes.ModifiedCount) + e2e.EnvWaitForEqualTablesWithNames(env, s, "update event", srcTable, dstTable, "_id,_full_document") + + replaceRes, err := collection.ReplaceOne( + t.Context(), + bson.D{bson.E{Key: "key", Value: 2}}, + bson.D{bson.E{Key: "key", Value: 3}}, + options.Replace()) + require.NoError(t, err) + require.Equal(t, int64(1), replaceRes.ModifiedCount) + e2e.EnvWaitForEqualTablesWithNames(env, s, "replace event", srcTable, dstTable, "_id,_full_document") + + deleteRes, err := collection.DeleteOne(t.Context(), bson.D{bson.E{Key: "key", Value: 3}}, options.DeleteOne()) + require.NoError(t, err) + require.Equal(t, int64(1), deleteRes.DeletedCount) + e2e.EnvWaitForEqualTablesWithNames(env, s, "delete event", srcTable, dstTable, "_id,_full_document") + + env.Cancel(t.Context()) + e2e.RequireEnvCanceled(t, env) +} diff --git a/flow/e2e/mysql.go b/flow/e2e/mysql.go index 6047f97219..5175dcbd07 100644 --- a/flow/e2e/mysql.go +++ b/flow/e2e/mysql.go @@ -12,7 +12,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/connectors/utils" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type MySqlSource struct { @@ -167,7 +168,8 @@ func (s *MySqlSource) GetRows(ctx context.Context, suffix string, table string, } tableName := fmt.Sprintf("e2e_test_%s.%s", suffix, table) - tableSchemas, err := s.GetTableSchema(ctx, nil, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) + tableSchemas, err := s.GetTableSchema(ctx, nil, shared.InternalVersion_Latest, protos.TypeSystem_Q, + []*protos.TableMapping{{SourceTableIdentifier: tableName}}) if err != nil { return nil, err } @@ -183,9 +185,9 @@ func (s *MySqlSource) GetRows(ctx context.Context, suffix string, table string, } for _, row := range rs.Values { - record := make([]qvalue.QValue, 0, len(row)) + record := make([]types.QValue, 0, len(row)) for idx, val := range row { - qv, err := connmysql.QValueFromMysqlFieldValue(schema.Fields[idx].Type, val) + qv, err := connmysql.QValueFromMysqlFieldValue(schema.Fields[idx].Type, rs.Fields[idx].Type, val) if err != nil { return nil, err } diff --git a/flow/e2e/pg.go b/flow/e2e/pg.go index 8a8bf2df9f..aeebe18ce9 100644 --- a/flow/e2e/pg.go +++ b/flow/e2e/pg.go @@ -17,6 +17,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" + "github.com/PeerDB-io/peerdb/flow/shared" ) func cleanPostgres(ctx context.Context, conn *pgx.Conn, suffix string) error { @@ -194,7 +195,7 @@ func (s *PostgresSource) Exec(ctx context.Context, sql string) error { } func (s *PostgresSource) GetRows(ctx context.Context, suffix string, table string, cols string) (*model.QRecordBatch, error) { - pgQueryExecutor, err := s.PostgresConnector.NewQRepQueryExecutor(ctx, "testflow", "testpart") + pgQueryExecutor, err := s.PostgresConnector.NewQRepQueryExecutor(ctx, shared.InternalVersion_Latest, "testflow", "testpart") if err != nil { return nil, err } @@ -207,7 +208,7 @@ func (s *PostgresSource) GetRows(ctx context.Context, suffix string, table strin // to avoid fetching rows from "child" tables ala Postgres table inheritance func (s *PostgresSource) GetRowsOnly(ctx context.Context, suffix string, table string, cols string) (*model.QRecordBatch, error) { - pgQueryExecutor, err := s.PostgresConnector.NewQRepQueryExecutor(ctx, "testflow", "testpart") + pgQueryExecutor, err := s.PostgresConnector.NewQRepQueryExecutor(ctx, shared.InternalVersion_Latest, "testflow", "testpart") if err != nil { return nil, err } @@ -242,3 +243,24 @@ func RevokePermissionForTableColumns(ctx context.Context, conn *pgx.Conn, tableI return nil } + +func (s *PostgresSource) Query(ctx context.Context, query string) (*model.QRecordBatch, error) { + pgQueryExecutor, err := s.PostgresConnector.NewQRepQueryExecutor(ctx, shared.InternalVersion_Latest, "testflow", "testpart") + if err != nil { + return nil, err + } + + return pgQueryExecutor.ExecuteAndProcessQuery(ctx, query) +} + +func (s *PostgresSource) GetLogCount(ctx context.Context, flowJobName, errorType, pattern string) (int, error) { + rows, err := s.Query(ctx, fmt.Sprintf(` + SELECT COUNT(*) FROM peerdb_stats.flow_errors + WHERE error_type='%s' AND position('%s' in flow_name) > 0 + AND error_message ILIKE '%%%s%%'`, errorType, flowJobName, pattern)) + if err != nil { + return 0, err + } + + return int(rows.Records[0][0].Value().(int64)), nil +} diff --git a/flow/e2e/postgres/peer_flow_pg_test.go b/flow/e2e/postgres/peer_flow_pg_test.go index b2acace287..1cde326f6e 100644 --- a/flow/e2e/postgres/peer_flow_pg_test.go +++ b/flow/e2e/postgres/peer_flow_pg_test.go @@ -96,7 +96,7 @@ func (s PeerFlowE2ETestSuitePG) Test_Geospatial_PG() { e2e.RequireEnvCanceled(s.t, env) } -func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { +func (s PeerFlowE2ETestSuitePG) Test_Types() { tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_types_pg") @@ -153,18 +153,53 @@ func (s PeerFlowE2ETestSuitePG) Test_Types_PG() { "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46", "c47", "c48", "c49", "c50", }, ",") e2e.EnvWaitFor(s.t, env, 3*time.Minute, "normalize types", func() bool { - return s.comparePGTables(srcTableName, dstTableName, allCols) == nil + err := s.comparePGTables(srcTableName, dstTableName, allCols) + if err != nil { + s.t.Log("mismatch", err) + } + return err == nil }) - // c36 lost tz info so does not compare equal + // c36 converted to UTC, losing tz info, so does not compare equal var c36 string require.NoError(s.t, s.Conn().QueryRow(s.t.Context(), "select c36 from "+dstTableName).Scan(&c36)) - require.Equal(s.t, "09:25:00+00", c36) + require.Equal(s.t, "06:25:00+00", c36) + + env.Cancel(s.t.Context()) + e2e.RequireEnvCanceled(s.t, env) +} + +func (s PeerFlowE2ETestSuitePG) Test_PgVector() { + srcTableName := "pg_pgvector" + srcFullName := s.attachSchemaSuffix(srcTableName) + dstTableName := "pg_pgvector_dst" + + require.NoError(s.t, s.Exec(s.t.Context(), + fmt.Sprintf(`CREATE TABLE IF NOT EXISTS %s (id SERIAL PRIMARY KEY, v1 vector, hv halfvec, sv sparsevec)`, srcFullName))) + require.NoError(s.t, s.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3]','[1,2.5,3]','{1:1.5,3:3.5}/5')`, srcFullName))) + + connectionGen := e2e.FlowConnectionGenerationConfig{ + FlowJobName: e2e.AddSuffix(s, srcTableName), + TableMappings: e2e.TableMappings(s, srcTableName, dstTableName), + Destination: s.Peer().Name, + } + flowConnConfig := connectionGen.GenerateFlowConnectionConfigs(s) + flowConnConfig.DoInitialSnapshot = true + + tc := e2e.NewTemporalClient(s.t) + env := e2e.ExecutePeerflow(s.t.Context(), tc, peerflow.CDCFlowWorkflow, flowConnConfig, nil) + e2e.SetupCDCFlowStatusQuery(s.t, env, flowConnConfig) + e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 1", srcTableName, dstTableName, "id,v1,hv,sv") + + require.NoError(s.t, s.Exec(s.t.Context(), + fmt.Sprintf(`insert into %s (v1,hv,sv) values ('[1.5,2,3.5]','[1,2,3.5]','{2:2.5,3:3.5}/5')`, srcFullName))) + e2e.EnvWaitForEqualTablesWithNames(env, s, "check comparable types 2", srcTableName, dstTableName, "id,v1,hv,sv") env.Cancel(s.t.Context()) e2e.RequireEnvCanceled(s.t, env) } -func (s PeerFlowE2ETestSuitePG) Test_Enums_PG() { +func (s PeerFlowE2ETestSuitePG) Test_Enums() { tc := e2e.NewTemporalClient(s.t) srcTableName := s.attachSchemaSuffix("test_enum_flow") @@ -205,7 +240,6 @@ func (s PeerFlowE2ETestSuitePG) Test_Enums_PG() { }) env.Cancel(s.t.Context()) - e2e.RequireEnvCanceled(s.t, env) } diff --git a/flow/e2e/postgres/postgres.go b/flow/e2e/postgres/postgres.go index 7b0f192357..d846664a74 100644 --- a/flow/e2e/postgres/postgres.go +++ b/flow/e2e/postgres/postgres.go @@ -57,9 +57,14 @@ func (s PeerFlowE2ETestSuitePG) DestinationTable(table string) string { return e2e.AttachSchema(s, table) } +func (s PeerFlowE2ETestSuitePG) Exec(ctx context.Context, sql string) error { + _, err := s.conn.Conn().Exec(ctx, sql) + return err +} + func (s PeerFlowE2ETestSuitePG) GetRows(table string, cols string) (*model.QRecordBatch, error) { s.t.Helper() - pgQueryExecutor, err := s.conn.NewQRepQueryExecutor(s.t.Context(), "testflow", "testpart") + pgQueryExecutor, err := s.conn.NewQRepQueryExecutor(s.t.Context(), shared.InternalVersion_Latest, "testflow", "testpart") if err != nil { return nil, err } diff --git a/flow/e2e/postgres/qrep_flow_pg_test.go b/flow/e2e/postgres/qrep_flow_pg_test.go index 1a4d98ad76..c4aa155f53 100644 --- a/flow/e2e/postgres/qrep_flow_pg_test.go +++ b/flow/e2e/postgres/qrep_flow_pg_test.go @@ -238,8 +238,7 @@ func (s PeerFlowE2ETestSuitePG) Test_PG_TypeSystemQRep() { e2e.EnvWaitForFinished(s.t, env, 3*time.Minute) require.NoError(s.t, env.Error(s.t.Context())) - err = s.comparePGTables(srcSchemaQualified, dstSchemaQualified, "*") - require.NoError(s.t, err) + require.NoError(s.t, s.comparePGTables(srcSchemaQualified, dstSchemaQualified, "*")) } func (s PeerFlowE2ETestSuitePG) Test_PeerDB_Columns_QRep_PG() { @@ -523,8 +522,7 @@ func (s PeerFlowE2ETestSuitePG) TestTransform() { tc := e2e.NewTemporalClient(s.t) env := e2e.RunQRepFlowWorkflow(s.t.Context(), tc, qrepConfig) e2e.EnvWaitFor(s.t, env, 3*time.Minute, "waiting for first sync to complete", func() bool { - err := s.compareCounts(dstSchemaQualified, int64(numRows)) - return err == nil + return s.compareCounts(dstSchemaQualified, int64(numRows)) == nil }) require.NoError(s.t, env.Error(s.t.Context())) diff --git a/flow/e2e/s3/cdc_s3_test.go b/flow/e2e/s3/cdc_s3_test.go index 9f3d3042da..5567e7ca57 100644 --- a/flow/e2e/s3/cdc_s3_test.go +++ b/flow/e2e/s3/cdc_s3_test.go @@ -60,20 +60,22 @@ func (s PeerFlowE2ETestSuiteS3) Test_Complete_Simple_Flow_S3() { ctx, cancel := context.WithTimeout(s.t.Context(), 25*time.Second) defer cancel() files, err := s.s3Helper.ListAllFiles(ctx, flowJobName) - s.t.Logf("Files in Test_Complete_Simple_Flow_S3 %s: %d", flowJobName, len(files)) e2e.EnvNoError(s.t, env, err) + s.t.Logf("Files in Test_Complete_Simple_Flow_S3 %s: %d", flowJobName, len(files)) return len(files) == 4 }) - // s3 normalize is nop, so check peerdb_stats directly that batch finalized pool, err := internal.GetCatalogConnectionPoolFromEnv(s.t.Context()) require.NoError(s.t, err) - var count int64 - require.NoError(s.t, pool.QueryRow(s.t.Context(), - "select count(*) from peerdb_stats.cdc_batches where flow_name = $1 and end_time is not null", - flowJobName, - ).Scan(&count)) - require.Equal(s.t, int64(4), count) + e2e.EnvWaitFor(s.t, env, time.Minute, "waiting for cdc batch completion", func() bool { + // s3 normalize is nop, so check peerdb_stats directly that batch finalized + var count int64 + require.NoError(s.t, pool.QueryRow(s.t.Context(), + "select count(*) from peerdb_stats.cdc_batches where flow_name = $1 and end_time is not null", + flowJobName, + ).Scan(&count)) + return count == 4 + }) env.Cancel(s.t.Context()) e2e.RequireEnvCanceled(s.t, env) diff --git a/flow/e2e/s3/qrep_flow_s3_test.go b/flow/e2e/s3/qrep_flow_s3_test.go index 48143321ad..e77c138ba3 100644 --- a/flow/e2e/s3/qrep_flow_s3_test.go +++ b/flow/e2e/s3/qrep_flow_s3_test.go @@ -116,6 +116,11 @@ func SetupSuiteMinIO(t *testing.T) PeerFlowE2ETestSuiteS3 { return setupSuite(t, Minio) } +func SetupSuiteMinIO_TLS(t *testing.T) PeerFlowE2ETestSuiteS3 { + t.Helper() + return setupSuite(t, MinioTls) +} + func (s PeerFlowE2ETestSuiteS3) Test_Complete_QRep_Flow_S3() { if s.s3Helper == nil { s.t.Skip("Skipping S3 test") diff --git a/flow/e2e/s3/s3_helper.go b/flow/e2e/s3/s3_helper.go index 4524d5eaf0..1a76301fd0 100644 --- a/flow/e2e/s3/s3_helper.go +++ b/flow/e2e/s3/s3_helper.go @@ -2,12 +2,12 @@ package e2e_s3 import ( "context" + "encoding/base64" "encoding/json" "fmt" "os" "time" - "github.com/aws/aws-sdk-go-v2/aws" "github.com/aws/aws-sdk-go-v2/service/s3" s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" @@ -26,17 +26,29 @@ type S3TestHelper struct { type S3Environment int +type S3PeerCredentials struct { + AccessKeyID string `json:"accessKeyId"` + SecretAccessKey string `json:"secretAccessKey"` + AwsRoleArn string `json:"awsRoleArn"` + SessionToken string `json:"sessionToken"` + Region string `json:"region"` + Endpoint string `json:"endpoint"` +} + const ( Aws S3Environment = iota Gcs Minio + MinioTls ) func NewS3TestHelper(ctx context.Context, s3environment S3Environment) (*S3TestHelper, error) { - var config utils.S3PeerCredentials + var config S3PeerCredentials var endpoint string var credsPath string var bucketName string + var rootCA *string + var tlsHost string switch s3environment { case Aws: credsPath = os.Getenv("TEST_S3_CREDS") @@ -51,6 +63,18 @@ func NewS3TestHelper(ctx context.Context, s3environment S3Environment) (*S3TestH config.AccessKeyID = os.Getenv("AWS_ACCESS_KEY_ID") config.SecretAccessKey = os.Getenv("AWS_SECRET_ACCESS_KEY") config.Region = os.Getenv("AWS_REGION") + case MinioTls: + bucketName = "peerdb" + endpoint = os.Getenv("AWS_ENDPOINT_URL_S3_TLS") + config.AccessKeyID = os.Getenv("AWS_ACCESS_KEY_ID") + config.SecretAccessKey = os.Getenv("AWS_SECRET_ACCESS_KEY") + config.Region = os.Getenv("AWS_REGION") + bytes, err := e2eshared.ReadFileToBytes("./certs/cert.crt") + if err != nil { + return nil, err + } + rootCA = shared.Ptr(base64.StdEncoding.EncodeToString(bytes)) + tlsHost = "minio.local" default: panic(fmt.Sprintf("invalid s3environment %d", s3environment)) } @@ -66,35 +90,32 @@ func NewS3TestHelper(ctx context.Context, s3environment S3Environment) (*S3TestH } } - var endpointUrlPtr *string - if endpoint != "" { - endpointUrlPtr = &endpoint + prefix := fmt.Sprintf("peerdb_test/%d_%s", time.Now().Unix(), shared.RandomString(6)) + + s3config := &protos.S3Config{ + Url: fmt.Sprintf("s3://%s/%s", bucketName, prefix), + AccessKeyId: &config.AccessKeyID, + SecretAccessKey: &config.SecretAccessKey, + Region: &config.Region, + Endpoint: shared.Ptr(endpoint), + RootCa: rootCA, + TlsHost: tlsHost, + } + + provider, err := utils.GetAWSCredentialsProvider(ctx, "ci", utils.NewPeerAWSCredentials(s3config)) + if err != nil { + return nil, err } - provider := utils.NewStaticAWSCredentialsProvider(utils.AWSCredentials{ - AWS: aws.Credentials{ - AccessKeyID: config.AccessKeyID, - SecretAccessKey: config.SecretAccessKey, - SessionToken: config.SessionToken, - }, - EndpointUrl: endpointUrlPtr, - }, config.Region) client, err := utils.CreateS3Client(ctx, provider) if err != nil { return nil, err } - prefix := fmt.Sprintf("peerdb_test/%d_%s", time.Now().Unix(), shared.RandomString(6)) return &S3TestHelper{ - client, - &protos.S3Config{ - Url: fmt.Sprintf("s3://%s/%s", bucketName, prefix), - AccessKeyId: &config.AccessKeyID, - SecretAccessKey: &config.SecretAccessKey, - Region: &config.Region, - Endpoint: endpointUrlPtr, - }, - bucketName, - prefix, + client: client, + S3Config: s3config, + BucketName: bucketName, + prefix: prefix, }, nil } diff --git a/flow/e2e/snowflake/peer_flow_sf_test.go b/flow/e2e/snowflake/peer_flow_sf_test.go index f540848b62..5de557eb24 100644 --- a/flow/e2e/snowflake/peer_flow_sf_test.go +++ b/flow/e2e/snowflake/peer_flow_sf_test.go @@ -460,7 +460,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { c33 TIMESTAMP,c34 TIMESTAMPTZ,c35 TIME, c36 TIMETZ,c37 TSQUERY,c38 TSVECTOR, c39 TXID_SNAPSHOT,c40 UUID,c41 XML, c42 GEOMETRY(POINT), c43 GEOGRAPHY(POINT), c44 GEOGRAPHY(POLYGON), c45 GEOGRAPHY(LINESTRING), c46 GEOMETRY(LINESTRING), c47 GEOMETRY(POLYGON), - c48 mood, c49 HSTORE, c50 DATE[], c51 TIMESTAMPTZ[], c52 TIMESTAMP[], c53 BOOLEAN[],c54 SMALLINT[]); + c48 mood, c49 HSTORE, c50 DATE[], c51 TIMESTAMPTZ[], c52 TIMESTAMP[], c53 BOOLEAN[],c54 SMALLINT[], + c55 NUMERIC(16,5)[], c56 NUMERIC[]); `, srcTableName)) require.NoError(s.t, err) @@ -495,7 +496,8 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { '{"2020-01-01 01:01:01+00", "2020-01-02 01:01:01+00"}'::timestamptz[], '{"2020-01-01 01:01:01", "2020-01-02 01:01:01"}'::timestamp[], '{true, false}'::boolean[], - '{1,2}'::smallint[]; + '{1,2}'::smallint[], + '{1.2, 1.23, null}'::numeric(16,5)[], '{1.2, 1.23, null}'::numeric[]; `, srcTableName)) e2e.EnvNoError(s.t, env, err) @@ -505,6 +507,7 @@ func (s PeerFlowE2ETestSuiteSF) Test_Types_SF() { "c14", "c15", "c16", "c17", "c18", "c21", "c22", "c23", "c24", "c28", "c29", "c30", "c31", "c33", "c34", "c35", "c36", "c37", "c38", "c7", "c8", "c32", "c42", "c43", "c44", "c45", "c46", "c47", "c48", "c49", "c50", "c51", "c52", "c53", "c54", + "c55", "c56", }) if err != nil { return false diff --git a/flow/e2e/snowflake/snowflake_helper.go b/flow/e2e/snowflake/snowflake_helper.go index 5c9e9ff635..15494ca19d 100644 --- a/flow/e2e/snowflake/snowflake_helper.go +++ b/flow/e2e/snowflake/snowflake_helper.go @@ -13,7 +13,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/e2eshared" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type SnowflakeTestHelper struct { @@ -138,11 +138,11 @@ func (s *SnowflakeTestHelper) RunIntQuery(ctx context.Context, query string) (in } switch v := rec[0].(type) { - case qvalue.QValueInt32: + case types.QValueInt32: return int(v.Val), nil - case qvalue.QValueInt64: + case types.QValueInt64: return int(v.Val), nil - case qvalue.QValueNumeric: + case types.QValueNumeric: return int(v.Val.IntPart()), nil default: return 0, fmt.Errorf("failed to execute query: %s, returned value of type %s", query, rec[0].Kind()) @@ -157,7 +157,7 @@ func (s *SnowflakeTestHelper) checkSyncedAt(ctx context.Context, query string) e for _, record := range recordBatch.Records { for _, entry := range record { - _, ok := entry.(qvalue.QValueTimestamp) + _, ok := entry.(types.QValueTimestamp) if !ok { return errors.New("synced_at column failed: _PEERDB_SYNCED_AT is not a timestamp") } @@ -175,7 +175,7 @@ func (s *SnowflakeTestHelper) checkIsDeleted(ctx context.Context, query string) for _, record := range recordBatch.Records { for _, entry := range record { - _, ok := entry.(qvalue.QValueBoolean) + _, ok := entry.(types.QValueBoolean) if !ok { return errors.New("is_deleted column failed: _PEERDB_IS_DELETED is not a boolean") } diff --git a/flow/e2e/snowflake/snowflake_schema_delta_test.go b/flow/e2e/snowflake/snowflake_schema_delta_test.go index cfbfcd3d63..ef8444a79d 100644 --- a/flow/e2e/snowflake/snowflake_schema_delta_test.go +++ b/flow/e2e/snowflake/snowflake_schema_delta_test.go @@ -10,7 +10,7 @@ import ( connsnowflake "github.com/PeerDB-io/peerdb/flow/connectors/snowflake" "github.com/PeerDB-io/peerdb/flow/e2eshared" "github.com/PeerDB-io/peerdb/flow/generated/protos" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) const ( @@ -59,14 +59,14 @@ func (s SnowflakeSchemaDeltaTestSuite) TestSimpleAddColumn() { AddedColumns: []*protos.FieldDescription{ { Name: "HI", - Type: string(qvalue.QValueKindJSON), + Type: string(types.QValueKindJSON), TypeModifier: -1, }, }, }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, 0, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, &protos.TableSchema{ @@ -74,12 +74,12 @@ func (s SnowflakeSchemaDeltaTestSuite) TestSimpleAddColumn() { Columns: []*protos.FieldDescription{ { Name: "ID", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "HI", - Type: string(qvalue.QValueKindJSON), + Type: string(types.QValueKindJSON), TypeModifier: -1, }, }, @@ -96,57 +96,57 @@ func (s SnowflakeSchemaDeltaTestSuite) TestAddAllColumnTypes() { Columns: []*protos.FieldDescription{ { Name: "ID", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "C1", - Type: string(qvalue.QValueKindBoolean), + Type: string(types.QValueKindBoolean), TypeModifier: -1, }, { Name: "C2", - Type: string(qvalue.QValueKindBytes), + Type: string(types.QValueKindBytes), TypeModifier: -1, }, { Name: "C3", - Type: string(qvalue.QValueKindDate), + Type: string(types.QValueKindDate), TypeModifier: -1, }, { Name: "C4", - Type: string(qvalue.QValueKindFloat64), + Type: string(types.QValueKindFloat64), TypeModifier: -1, }, { Name: "C5", - Type: string(qvalue.QValueKindJSON), + Type: string(types.QValueKindJSON), TypeModifier: -1, }, { Name: "C6", - Type: string(qvalue.QValueKindNumeric), + Type: string(types.QValueKindNumeric), TypeModifier: numericAddedColumnTypeModifier, // Numeric(16,7) }, { Name: "C7", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "C8", - Type: string(qvalue.QValueKindTime), + Type: string(types.QValueKindTime), TypeModifier: -1, }, { Name: "C9", - Type: string(qvalue.QValueKindTimestamp), + Type: string(types.QValueKindTimestamp), TypeModifier: -1, }, { Name: "C10", - Type: string(qvalue.QValueKindTimestampTZ), + Type: string(types.QValueKindTimestampTZ), TypeModifier: -1, }, }, @@ -156,7 +156,7 @@ func (s SnowflakeSchemaDeltaTestSuite) TestAddAllColumnTypes() { if column.Name != "ID" { var typeModifierOfAddedCol int32 typeModifierOfAddedCol = -1 - if column.Type == string(qvalue.QValueKindNumeric) { + if column.Type == string(types.QValueKindNumeric) { typeModifierOfAddedCol = numericAddedColumnTypeModifier } addedColumns = append(addedColumns, &protos.FieldDescription{ @@ -175,7 +175,7 @@ func (s SnowflakeSchemaDeltaTestSuite) TestAddAllColumnTypes() { }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, 0, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, expectedTableSchema, output[tableName]) @@ -191,47 +191,47 @@ func (s SnowflakeSchemaDeltaTestSuite) TestAddTrickyColumnNames() { Columns: []*protos.FieldDescription{ { Name: "ID", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "C1", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "C 1", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "RIGHT", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "SELECT", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "XMIN", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "CARIÑO", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "±ªÞ³§", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: "カラム", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, }, @@ -255,7 +255,7 @@ func (s SnowflakeSchemaDeltaTestSuite) TestAddTrickyColumnNames() { }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, 0, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, expectedTableSchema, output[tableName]) @@ -271,22 +271,22 @@ func (s SnowflakeSchemaDeltaTestSuite) TestAddWhitespaceColumnNames() { Columns: []*protos.FieldDescription{ { Name: " ", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: " ", - Type: string(qvalue.QValueKindString), + Type: string(types.QValueKindString), TypeModifier: -1, }, { Name: " ", - Type: string(qvalue.QValueKindTime), + Type: string(types.QValueKindTime), TypeModifier: -1, }, { Name: "\t", - Type: string(qvalue.QValueKindDate), + Type: string(types.QValueKindDate), TypeModifier: -1, }, }, @@ -311,7 +311,7 @@ func (s SnowflakeSchemaDeltaTestSuite) TestAddWhitespaceColumnNames() { }}) require.NoError(s.t, err) - output, err := s.connector.GetTableSchema(s.t.Context(), nil, protos.TypeSystem_Q, + output, err := s.connector.GetTableSchema(s.t.Context(), nil, 0, protos.TypeSystem_Q, []*protos.TableMapping{{SourceTableIdentifier: tableName}}) require.NoError(s.t, err) require.Equal(s.t, expectedTableSchema, output[tableName]) diff --git a/flow/e2e/test_utils.go b/flow/e2e/test_utils.go index 3cedf1d1a6..c8ff225f30 100644 --- a/flow/e2e/test_utils.go +++ b/flow/e2e/test_utils.go @@ -28,8 +28,8 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" peerflow "github.com/PeerDB-io/peerdb/flow/workflows" ) @@ -479,6 +479,7 @@ func CreateQRepWorkflowConfig( SyncedAtColName: syncedAtCol, SetupWatermarkTableOnDestination: setupDst, SoftDeleteColName: isDeletedCol, + Version: shared.InternalVersion_Latest, } } @@ -490,59 +491,59 @@ func RunXminFlowWorkflow(ctx context.Context, tc client.Client, config *protos.Q return ExecutePeerflow(ctx, tc, peerflow.XminFlowWorkflow, config, nil) } -func GetOwnersSchema() *qvalue.QRecordSchema { - return &qvalue.QRecordSchema{ - Fields: []qvalue.QField{ - {Name: "id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "card_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "from", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "price", Type: qvalue.QValueKindNumeric, Nullable: true}, - {Name: "created_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "updated_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "transaction_hash", Type: qvalue.QValueKindBytes, Nullable: true}, - {Name: "ownerable_type", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "ownerable_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "user_nonce", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "transfer_type", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "blockchain", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "deal_type", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "deal_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "ethereum_transaction_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "ignore_price", Type: qvalue.QValueKindBoolean, Nullable: true}, - {Name: "card_eth_value", Type: qvalue.QValueKindFloat64, Nullable: true}, - {Name: "paid_eth_price", Type: qvalue.QValueKindFloat64, Nullable: true}, - {Name: "card_bought_notified", Type: qvalue.QValueKindBoolean, Nullable: true}, - {Name: "address", Type: qvalue.QValueKindNumeric, Nullable: true}, - {Name: "account_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "asset_id", Type: qvalue.QValueKindNumeric, Nullable: true}, - {Name: "status", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "transaction_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "settled_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "reference_id", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "settle_at", Type: qvalue.QValueKindTimestamp, Nullable: true}, - {Name: "settlement_delay_reason", Type: qvalue.QValueKindInt64, Nullable: true}, - {Name: "f1", Type: qvalue.QValueKindArrayString, Nullable: true}, - {Name: "f2", Type: qvalue.QValueKindArrayInt64, Nullable: true}, - {Name: "f3", Type: qvalue.QValueKindArrayInt32, Nullable: true}, - {Name: "f4", Type: qvalue.QValueKindArrayString, Nullable: true}, - {Name: "f5", Type: qvalue.QValueKindJSON, Nullable: true}, - {Name: "f6", Type: qvalue.QValueKindJSON, Nullable: true}, - {Name: "f7", Type: qvalue.QValueKindJSON, Nullable: true}, - {Name: "f8", Type: qvalue.QValueKindInt16, Nullable: true}, - {Name: "f13", Type: qvalue.QValueKindArrayInt16, Nullable: true}, - {Name: "my_date", Type: qvalue.QValueKindDate, Nullable: true}, - {Name: "old_date", Type: qvalue.QValueKindDate, Nullable: true}, - {Name: "my_time", Type: qvalue.QValueKindTime, Nullable: true}, - {Name: "my_mood", Type: qvalue.QValueKindString, Nullable: true}, - {Name: "geometryPoint", Type: qvalue.QValueKindGeometry, Nullable: true}, - {Name: "geometry_linestring", Type: qvalue.QValueKindGeometry, Nullable: true}, - {Name: "geometry_polygon", Type: qvalue.QValueKindGeometry, Nullable: true}, - {Name: "geography_point", Type: qvalue.QValueKindGeography, Nullable: true}, - {Name: "geography_linestring", Type: qvalue.QValueKindGeography, Nullable: true}, - {Name: "geography_polygon", Type: qvalue.QValueKindGeography, Nullable: true}, - {Name: "myreal", Type: qvalue.QValueKindFloat32, Nullable: true}, - {Name: "myreal2", Type: qvalue.QValueKindFloat32, Nullable: true}, - {Name: "myreal3", Type: qvalue.QValueKindFloat32, Nullable: true}, +func GetOwnersSchema() *types.QRecordSchema { + return &types.QRecordSchema{ + Fields: []types.QField{ + {Name: "id", Type: types.QValueKindString, Nullable: true}, + {Name: "card_id", Type: types.QValueKindString, Nullable: true}, + {Name: "from", Type: types.QValueKindTimestamp, Nullable: true}, + {Name: "price", Type: types.QValueKindNumeric, Nullable: true}, + {Name: "created_at", Type: types.QValueKindTimestamp, Nullable: true}, + {Name: "updated_at", Type: types.QValueKindTimestamp, Nullable: true}, + {Name: "transaction_hash", Type: types.QValueKindBytes, Nullable: true}, + {Name: "ownerable_type", Type: types.QValueKindString, Nullable: true}, + {Name: "ownerable_id", Type: types.QValueKindString, Nullable: true}, + {Name: "user_nonce", Type: types.QValueKindInt64, Nullable: true}, + {Name: "transfer_type", Type: types.QValueKindInt64, Nullable: true}, + {Name: "blockchain", Type: types.QValueKindInt64, Nullable: true}, + {Name: "deal_type", Type: types.QValueKindString, Nullable: true}, + {Name: "deal_id", Type: types.QValueKindString, Nullable: true}, + {Name: "ethereum_transaction_id", Type: types.QValueKindString, Nullable: true}, + {Name: "ignore_price", Type: types.QValueKindBoolean, Nullable: true}, + {Name: "card_eth_value", Type: types.QValueKindFloat64, Nullable: true}, + {Name: "paid_eth_price", Type: types.QValueKindFloat64, Nullable: true}, + {Name: "card_bought_notified", Type: types.QValueKindBoolean, Nullable: true}, + {Name: "address", Type: types.QValueKindNumeric, Nullable: true}, + {Name: "account_id", Type: types.QValueKindString, Nullable: true}, + {Name: "asset_id", Type: types.QValueKindNumeric, Nullable: true}, + {Name: "status", Type: types.QValueKindInt64, Nullable: true}, + {Name: "transaction_id", Type: types.QValueKindString, Nullable: true}, + {Name: "settled_at", Type: types.QValueKindTimestamp, Nullable: true}, + {Name: "reference_id", Type: types.QValueKindString, Nullable: true}, + {Name: "settle_at", Type: types.QValueKindTimestamp, Nullable: true}, + {Name: "settlement_delay_reason", Type: types.QValueKindInt64, Nullable: true}, + {Name: "f1", Type: types.QValueKindArrayString, Nullable: true}, + {Name: "f2", Type: types.QValueKindArrayInt64, Nullable: true}, + {Name: "f3", Type: types.QValueKindArrayInt32, Nullable: true}, + {Name: "f4", Type: types.QValueKindArrayString, Nullable: true}, + {Name: "f5", Type: types.QValueKindJSON, Nullable: true}, + {Name: "f6", Type: types.QValueKindJSON, Nullable: true}, + {Name: "f7", Type: types.QValueKindJSON, Nullable: true}, + {Name: "f8", Type: types.QValueKindInt16, Nullable: true}, + {Name: "f13", Type: types.QValueKindArrayInt16, Nullable: true}, + {Name: "my_date", Type: types.QValueKindDate, Nullable: true}, + {Name: "old_date", Type: types.QValueKindDate, Nullable: true}, + {Name: "my_time", Type: types.QValueKindTime, Nullable: true}, + {Name: "my_mood", Type: types.QValueKindString, Nullable: true}, + {Name: "geometryPoint", Type: types.QValueKindGeometry, Nullable: true}, + {Name: "geometry_linestring", Type: types.QValueKindGeometry, Nullable: true}, + {Name: "geometry_polygon", Type: types.QValueKindGeometry, Nullable: true}, + {Name: "geography_point", Type: types.QValueKindGeography, Nullable: true}, + {Name: "geography_linestring", Type: types.QValueKindGeography, Nullable: true}, + {Name: "geography_polygon", Type: types.QValueKindGeography, Nullable: true}, + {Name: "myreal", Type: types.QValueKindFloat32, Nullable: true}, + {Name: "myreal2", Type: types.QValueKindFloat32, Nullable: true}, + {Name: "myreal3", Type: types.QValueKindFloat32, Nullable: true}, }, } } diff --git a/flow/e2eshared/e2eshared.go b/flow/e2eshared/e2eshared.go index 1b262e7fe3..dbcb9c4404 100644 --- a/flow/e2eshared/e2eshared.go +++ b/flow/e2eshared/e2eshared.go @@ -12,6 +12,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/model" "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type Suite interface { @@ -79,7 +80,7 @@ func ReadFileToBytes(path string) ([]byte, error) { } // checks if two QRecords are identical -func CheckQRecordEquality(t *testing.T, q []qvalue.QValue, other []qvalue.QValue) bool { +func CheckQRecordEquality(t *testing.T, q []types.QValue, other []types.QValue) bool { t.Helper() if len(q) != len(other) { diff --git a/flow/e2eshared/e2eshared_test.go b/flow/e2eshared/e2eshared_test.go index 7a894e7b12..fd90f24f4b 100644 --- a/flow/e2eshared/e2eshared_test.go +++ b/flow/e2eshared/e2eshared_test.go @@ -6,7 +6,7 @@ import ( "testing" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func TestReadMissingFileToBytes(t *testing.T) { @@ -17,8 +17,8 @@ func TestReadMissingFileToBytes(t *testing.T) { func TestInequalRecordCountsInequal(t *testing.T) { if CheckQRecordEquality(t, - []qvalue.QValue{qvalue.QValueNull(qvalue.QValueKindString), qvalue.QValueNull(qvalue.QValueKindString)}, - []qvalue.QValue{qvalue.QValueNull(qvalue.QValueKindString)}, + []types.QValue{types.QValueNull(types.QValueKindString), types.QValueNull(types.QValueKindString)}, + []types.QValue{types.QValueNull(types.QValueKindString)}, ) { t.Error("2 records should not be equal to 1 record") } @@ -26,22 +26,22 @@ func TestInequalRecordCountsInequal(t *testing.T) { func TestInequalRecordSchemasInequal(t *testing.T) { if CheckEqualRecordBatches(t, - &model.QRecordBatch{Schema: qvalue.QRecordSchema{ - Fields: []qvalue.QField{{Name: "name"}}, + &model.QRecordBatch{Schema: types.QRecordSchema{ + Fields: []types.QField{{Name: "name"}}, }}, - &model.QRecordBatch{Schema: qvalue.QRecordSchema{ - Fields: []qvalue.QField{{Name: "different"}}, + &model.QRecordBatch{Schema: types.QRecordSchema{ + Fields: []types.QField{{Name: "different"}}, }}, ) { t.Error("schemas with differing column names should be non-equal") } if !CheckEqualRecordBatches(t, - &model.QRecordBatch{Schema: qvalue.QRecordSchema{ - Fields: []qvalue.QField{{Name: "name"}}, + &model.QRecordBatch{Schema: types.QRecordSchema{ + Fields: []types.QField{{Name: "name"}}, }}, - &model.QRecordBatch{Schema: qvalue.QRecordSchema{ - Fields: []qvalue.QField{{Name: "name"}}, + &model.QRecordBatch{Schema: types.QRecordSchema{ + Fields: []types.QField{{Name: "name"}}, }}, ) { t.Error("empty batches with same schema should be equal") diff --git a/flow/go.mod b/flow/go.mod index dc95fc6929..1aca06874c 100644 --- a/flow/go.mod +++ b/flow/go.mod @@ -7,53 +7,54 @@ require ( cloud.google.com/go/bigquery v1.69.0 cloud.google.com/go/pubsub v1.49.0 cloud.google.com/go/storage v1.55.0 - github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.0 - github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.3.2 + github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1 + github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/v2 v2.0.0 github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.3.0 github.com/ClickHouse/ch-go v0.66.0 - github.com/ClickHouse/clickhouse-go/v2 v2.35.0 + github.com/ClickHouse/clickhouse-go/v2 v2.37.1 github.com/PeerDB-io/glua64 v1.0.1 github.com/PeerDB-io/gluabit32 v1.0.2 github.com/PeerDB-io/gluajson v1.0.2 github.com/PeerDB-io/gluamsgpack v1.0.4 github.com/PeerDB-io/gluautf8 v1.0.0 - github.com/aws/aws-sdk-go-v2 v1.36.3 - github.com/aws/aws-sdk-go-v2/config v1.29.14 - github.com/aws/aws-sdk-go-v2/credentials v1.17.67 - github.com/aws/aws-sdk-go-v2/feature/rds/auth v1.5.11 - github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.77 - github.com/aws/aws-sdk-go-v2/service/kms v1.38.3 - github.com/aws/aws-sdk-go-v2/service/s3 v1.80.0 - github.com/aws/aws-sdk-go-v2/service/ses v1.30.2 - github.com/aws/aws-sdk-go-v2/service/sns v1.34.4 - github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 - github.com/aws/smithy-go v1.22.3 + github.com/aws/aws-sdk-go-v2 v1.36.5 + github.com/aws/aws-sdk-go-v2/config v1.29.17 + github.com/aws/aws-sdk-go-v2/credentials v1.17.70 + github.com/aws/aws-sdk-go-v2/feature/rds/auth v1.5.13 + github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.81 + github.com/aws/aws-sdk-go-v2/service/kms v1.41.1 + github.com/aws/aws-sdk-go-v2/service/s3 v1.81.0 + github.com/aws/aws-sdk-go-v2/service/ses v1.30.4 + github.com/aws/aws-sdk-go-v2/service/sns v1.34.7 + github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 + github.com/aws/smithy-go v1.22.4 github.com/cockroachdb/pebble v1.1.5 - github.com/elastic/go-elasticsearch/v8 v8.18.0 + github.com/elastic/go-elasticsearch/v8 v8.18.1 github.com/go-mysql-org/go-mysql v1.12.0 github.com/google/uuid v1.6.0 - github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 - github.com/hamba/avro/v2 v2.28.0 + github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.0 + github.com/hamba/avro/v2 v2.29.0 github.com/jackc/pgerrcode v0.0.0-20240316143900-6e2875d9b438 github.com/jackc/pglogrepl v0.0.0-20250509230407-a9884f6bd75a github.com/jackc/pgx/v5 v5.7.5 github.com/joho/godotenv v1.5.1 github.com/lestrrat-go/jwx/v2 v2.1.6 - github.com/lib/pq v1.10.9 github.com/orcaman/concurrent-map/v2 v2.0.1 + github.com/pgvector/pgvector-go v0.3.0 github.com/pingcap/tidb v0.0.0-20250130070702-43f2fb91d740 - github.com/pingcap/tidb/pkg/parser v0.0.0-20250531022214-e7b038b99132 + github.com/pingcap/tidb/pkg/parser v0.0.0-20250623120500-dfc0a21a9c60 github.com/shopspring/decimal v1.4.0 - github.com/slack-go/slack v0.17.0 + github.com/slack-go/slack v0.17.1 github.com/snowflakedb/gosnowflake v1.14.1 github.com/stretchr/testify v1.10.0 - github.com/twmb/franz-go v1.19.4 + github.com/twmb/franz-go v1.19.5 github.com/twmb/franz-go/pkg/kadm v1.16.0 github.com/twmb/franz-go/plugin/kslog v1.0.0 github.com/twpayne/go-geos v0.20.1 - github.com/urfave/cli/v3 v3.3.3 + github.com/urfave/cli/v3 v3.3.8 github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 github.com/yuin/gopher-lua v1.1.1 + go.mongodb.org/mongo-driver/v2 v2.2.2 go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 go.opentelemetry.io/otel v1.36.0 go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.36.0 @@ -61,23 +62,23 @@ require ( go.opentelemetry.io/otel/metric v1.36.0 go.opentelemetry.io/otel/sdk v1.36.0 go.opentelemetry.io/otel/sdk/metric v1.36.0 - go.temporal.io/api v1.49.1 + go.temporal.io/api v1.50.0 go.temporal.io/sdk v1.34.0 go.temporal.io/sdk/contrib/opentelemetry v0.6.0 go.uber.org/automaxprocs v1.6.0 - golang.org/x/crypto v0.38.0 - golang.org/x/sync v0.14.0 - google.golang.org/api v0.235.0 - google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a - google.golang.org/grpc v1.72.2 + golang.org/x/crypto v0.39.0 + golang.org/x/sync v0.15.0 + google.golang.org/api v0.238.0 + google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 + google.golang.org/grpc v1.73.0 google.golang.org/protobuf v1.36.6 - k8s.io/apimachinery v0.33.1 - k8s.io/client-go v0.33.1 + k8s.io/apimachinery v0.33.2 + k8s.io/client-go v0.33.2 ) require ( cel.dev/expr v0.24.0 // indirect - cloud.google.com/go/auth v0.16.1 // indirect + cloud.google.com/go/auth v0.16.2 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/monitoring v1.24.2 // indirect filippo.io/edwards25519 v1.1.0 // indirect @@ -85,22 +86,22 @@ require ( github.com/99designs/keyring v1.2.2 // indirect github.com/BurntSushi/toml v1.5.0 // indirect github.com/DataDog/zstd v1.5.7 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.28.0 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.52.0 // indirect - github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.52.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 // indirect + github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 // indirect github.com/Masterminds/semver v1.5.0 // indirect github.com/VividCortex/ewma v1.2.0 // indirect - github.com/apache/arrow-go/v18 v18.3.0 // indirect + github.com/apache/arrow-go/v18 v18.3.1 // indirect github.com/apache/arrow/go/v15 v15.0.2 // indirect github.com/apache/thrift v0.22.0 // indirect - github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 // indirect + github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect - github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 // indirect - github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 // indirect + github.com/aws/aws-sdk-go-v2/service/sso v1.25.5 // indirect + github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3 // indirect github.com/beorn7/perks v1.0.1 // indirect github.com/cenkalti/backoff/v5 v5.0.2 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect - github.com/cloudfoundry/gosigar v1.3.92 // indirect + github.com/cloudfoundry/gosigar v1.3.94 // indirect github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 // indirect github.com/cockroachdb/errors v1.12.0 // indirect github.com/cockroachdb/fifo v0.0.0-20240816210425-c5d0cb0b6fc0 // indirect @@ -132,6 +133,7 @@ require ( github.com/go-openapi/jsonpointer v0.21.1 // indirect github.com/go-openapi/jsonreference v0.21.0 // indirect github.com/go-openapi/swag v0.23.1 // indirect + github.com/go-viper/mapstructure/v2 v2.3.0 // indirect github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 // indirect github.com/golang/protobuf v1.5.4 // indirect github.com/google/btree v1.1.3 // indirect @@ -145,7 +147,7 @@ require ( github.com/klauspost/compress v1.18.0 // indirect github.com/kr/pretty v0.3.1 // indirect github.com/kr/text v0.2.0 // indirect - github.com/lestrrat-go/blackmagic v1.0.3 // indirect + github.com/lestrrat-go/blackmagic v1.0.4 // indirect github.com/lestrrat-go/httpcc v1.0.1 // indirect github.com/lestrrat-go/httprc v1.0.6 // indirect github.com/lestrrat-go/iter v1.0.2 // indirect @@ -154,7 +156,6 @@ require ( github.com/mailru/easyjson v0.9.0 // indirect github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 // indirect github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 // indirect - github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/mtibben/percent v0.2.1 // indirect @@ -163,9 +164,9 @@ require ( github.com/opentracing/basictracer-go v1.1.0 // indirect github.com/opentracing/opentracing-go v1.2.0 // indirect github.com/paulmach/orb v0.11.1 // indirect - github.com/pingcap/errors v0.11.5-0.20240318064555-6bd07397691f // indirect + github.com/pingcap/errors v0.11.5-0.20250523034308-74f78ae071ee // indirect github.com/pingcap/failpoint v0.0.0-20240528011301-b51a646c7c86 // indirect - github.com/pingcap/kvproto v0.0.0-20250530091117-d581b50ffdbd // indirect + github.com/pingcap/kvproto v0.0.0-20250616075548-d951fb623bb3 // indirect github.com/pingcap/log v1.1.1-0.20241212030209-7e3ff8601a2a // indirect github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 // indirect github.com/pkg/errors v0.9.1 // indirect @@ -173,7 +174,7 @@ require ( github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect github.com/prometheus/client_golang v1.22.0 // indirect github.com/prometheus/client_model v0.6.2 // indirect - github.com/prometheus/common v0.64.0 // indirect + github.com/prometheus/common v0.65.0 // indirect github.com/prometheus/procfs v0.16.1 // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/rogpeppe/go-internal v1.14.1 // indirect @@ -184,17 +185,20 @@ require ( github.com/spiffe/go-spiffe/v2 v2.5.0 // indirect github.com/tiancaiamao/gp v0.0.0-20230126082955-4f9e4f1ed9b5 // indirect github.com/tikv/client-go/v2 v2.0.8-0.20250117034919-61e09c6539bd // indirect - github.com/tikv/pd/client v0.0.0-20250530134205-67b518dbb195 // indirect + github.com/tikv/pd/client v0.0.0-20250623084542-60788950a745 // indirect github.com/tklauser/go-sysconf v0.3.15 // indirect github.com/tklauser/numcpus v0.10.0 // indirect github.com/twmb/franz-go/pkg/kmsg v1.11.2 // indirect github.com/twmb/murmur3 v1.1.8 // indirect github.com/x448/float16 v0.8.4 // indirect + github.com/xdg-go/pbkdf2 v1.0.0 // indirect + github.com/xdg-go/scram v1.1.2 // indirect + github.com/xdg-go/stringprep v1.0.4 // indirect github.com/yusufpapurcu/wmi v1.2.4 // indirect github.com/zeebo/errs v1.4.0 // indirect - go.etcd.io/etcd/api/v3 v3.6.0 // indirect - go.etcd.io/etcd/client/pkg/v3 v3.6.0 // indirect - go.etcd.io/etcd/client/v3 v3.6.0 // indirect + go.etcd.io/etcd/api/v3 v3.6.1 // indirect + go.etcd.io/etcd/client/pkg/v3 v3.6.1 // indirect + go.etcd.io/etcd/client/v3 v3.6.1 // indirect go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.opentelemetry.io/contrib/detectors/gcp v1.36.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect @@ -203,15 +207,15 @@ require ( go.uber.org/atomic v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect go.uber.org/zap v1.27.0 // indirect - golang.org/x/mod v0.24.0 // indirect + golang.org/x/mod v0.25.0 // indirect golang.org/x/term v0.32.0 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect - k8s.io/api v0.33.1 // indirect + k8s.io/api v0.33.2 // indirect k8s.io/klog/v2 v2.130.1 // indirect - k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff // indirect - k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 // indirect + k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a // indirect + k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 // indirect sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 // indirect sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.7.0 // indirect @@ -227,17 +231,15 @@ require ( github.com/Azure/go-amqp v1.4.0 // indirect github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 // indirect github.com/andybalholm/brotli v1.1.1 // indirect - github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 // indirect - github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 // indirect - github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 // indirect - github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.2 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 // indirect - github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 // indirect + github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 // indirect + github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36 // indirect + github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36 // indirect + github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.36 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.4 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17 // indirect + github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.17 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect - github.com/djherbis/buffer v1.2.0 - github.com/djherbis/nio/v3 v3.0.1 github.com/facebookgo/clock v0.0.0-20150410010913-600d898af40a // indirect github.com/goccy/go-json v0.10.5 // indirect github.com/gogo/protobuf v1.3.2 // indirect @@ -263,16 +265,16 @@ require ( github.com/stretchr/objx v0.5.2 // indirect github.com/zeebo/xxh3 v1.0.2 // indirect go.opencensus.io v0.24.0 // indirect - golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b - golang.org/x/net v0.40.0 // indirect + golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b + golang.org/x/net v0.41.0 // indirect golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/sys v0.33.0 // indirect - golang.org/x/text v0.25.0 // indirect - golang.org/x/time v0.11.0 // indirect - golang.org/x/tools v0.33.0 // indirect + golang.org/x/text v0.26.0 // indirect + golang.org/x/time v0.12.0 // indirect + golang.org/x/tools v0.34.0 // indirect golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da // indirect - google.golang.org/genproto v0.0.0-20250528174236-200df99c418a // indirect - google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a // indirect + google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect + google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/flow/go.sum b/flow/go.sum index 5264a6f916..2c66571b73 100644 --- a/flow/go.sum +++ b/flow/go.sum @@ -3,8 +3,8 @@ cel.dev/expr v0.24.0/go.mod h1:hLPLo1W4QUmuYdA72RBX06QTs6MXw941piREPl3Yfiw= cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= cloud.google.com/go v0.121.2 h1:v2qQpN6Dx9x2NmwrqlesOt3Ys4ol5/lFZ6Mg1B7OJCg= cloud.google.com/go v0.121.2/go.mod h1:nRFlrHq39MNVWu+zESP2PosMWA0ryJw8KUBZ2iZpxbw= -cloud.google.com/go/auth v0.16.1 h1:XrXauHMd30LhQYVRHLGvJiYeczweKQXZxsTbV9TiguU= -cloud.google.com/go/auth v0.16.1/go.mod h1:1howDHJ5IETh/LwYs3ZxvlkXF48aSqqJUM+5o02dNOI= +cloud.google.com/go/auth v0.16.2 h1:QvBAGFPLrDeoiNjyfVunhQ10HKNYuOwZ5noee0M5df4= +cloud.google.com/go/auth v0.16.2/go.mod h1:sRBas2Y1fB1vZTdurouM0AzuYQBMZinrUYL8EufhtEA= cloud.google.com/go/auth/oauth2adapt v0.2.8 h1:keo8NaayQZ6wimpNSmW5OPc283g65QNIiLpZnkHRbnc= cloud.google.com/go/auth/oauth2adapt v0.2.8/go.mod h1:XQ9y31RkqZCcwJWNSx2Xvric3RrU88hAYYbjDWYDL+c= cloud.google.com/go/bigquery v1.69.0 h1:rZvHnjSUs5sHK3F9awiuFk2PeOaB8suqNuim21GbaTc= @@ -29,6 +29,8 @@ cloud.google.com/go/storage v1.55.0 h1:NESjdAToN9u1tmhVqhXCaCwYBuvEhZLLv0gBr+2zn cloud.google.com/go/storage v1.55.0/go.mod h1:ztSmTTwzsdXe5syLVS0YsbFxXuvEmEyZj7v7zChEmuY= cloud.google.com/go/trace v1.11.6 h1:2O2zjPzqPYAHrn3OKl029qlqG6W8ZdYaOWRyr8NgMT4= cloud.google.com/go/trace v1.11.6/go.mod h1:GA855OeDEBiBMzcckLPE2kDunIpC72N+Pq8WFieFjnI= +entgo.io/ent v0.14.3 h1:wokAV/kIlH9TeklJWGGS7AYJdVckr0DloWjIcO9iIIQ= +entgo.io/ent v0.14.3/go.mod h1:aDPE/OziPEu8+OWbzy4UlvWmD2/kbRuWfK2A40hcxJM= filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA= filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4= github.com/99designs/go-keychain v0.0.0-20191008050251-8e49817e8af4 h1:/vQbFIOMbk2FiG/kXiLl8BRyzTWDw7gX/Hz7Dd5eDMs= @@ -37,14 +39,14 @@ github.com/99designs/keyring v1.2.2 h1:pZd3neh/EmUzWONb35LxQfvuY7kiSXAq3HQd97+XB github.com/99designs/keyring v1.2.2/go.mod h1:wes/FrByc8j7lFOAGLGSNEg8f/PaI3cgTBqhFkHUrPk= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 h1:Gt0j3wceWMwPmiazCa8MzMA0MfhmPIz0Qp0FJ6qcM0U= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0/go.mod h1:Ot/6aikWnKWi4l9QB7qVSwa8iMphQNqkWALMoNT3rzM= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.0 h1:j8BorDEigD8UFOSZQiSqAMOOleyQOOQPnUAwV+Ls1gA= -github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.0/go.mod h1:JdM5psgjfBf5fo2uWOZhflPWyDBZ/O/CNAH9CtsuZE4= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1 h1:B+blDbyVIG3WaikNxPnhPiJ1MThR03b3vKGtER95TP4= +github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1/go.mod h1:JdM5psgjfBf5fo2uWOZhflPWyDBZ/O/CNAH9CtsuZE4= github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2 h1:yz1bePFlP5Vws5+8ez6T3HWXPmwOK7Yvq8QxDBD3SKY= github.com/Azure/azure-sdk-for-go/sdk/azidentity/cache v0.3.2/go.mod h1:Pa9ZNPuoNu/GztvBSKk9J1cDJW6vk/n0zLtV4mgd8N8= github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1 h1:FPKJS1T+clwv+OLGt13a8UjqeRuh0O4SJ3lUriThc+4= github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1/go.mod h1:j2chePtV91HrC22tGoRX3sGY42uF13WzmmV80/OdVAA= -github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.3.2 h1:Hr35UBihxDesuh4JDMu/PcgAyIEmvoUl1IPfQnrK0YI= -github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs v1.3.2/go.mod h1:PNuUXQzL07VmB7IR63Qkh0htSOBzmuYYmu2cWVneFDY= +github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/v2 v2.0.0 h1:h7gH6+/PUP+flGgkDUmIzXfsCnZXlv/g9SjlbWovQ04= +github.com/Azure/azure-sdk-for-go/sdk/messaging/azeventhubs/v2 v2.0.0/go.mod h1:EEyRbPfkzkEmV8AJrYTZ/5of9l5aoarWGm5200n3/oY= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.3.0 h1:4hGvxD72TluuFIXVr8f4XkKZfqAa7Pj61t0jmQ7+kes= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/eventhub/armeventhub v1.3.0/go.mod h1:TSH7DcFItwAufy0Lz+Ft2cyopExCpxbOxI5SkH4dRNo= github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/internal/v3 v3.1.0 h1:2qsIIvxVT+uE6yrNldntJKlLRgxGbZ85kgtz5SNBhMw= @@ -68,18 +70,18 @@ github.com/BurntSushi/toml v1.5.0 h1:W5quZX/G/csjUnuI8SUYlsHs9M38FC7znL0lIO+DvMg github.com/BurntSushi/toml v1.5.0/go.mod h1:ukJfTF/6rtPPRCnwkur4qwRxa8vTRFBF0uk2lLoLwho= github.com/ClickHouse/ch-go v0.66.0 h1:hLslxxAVb2PHpbHr4n0d6aP8CEIpUYGMVT1Yj/Q5Img= github.com/ClickHouse/ch-go v0.66.0/go.mod h1:noiHWyLMJAZ5wYuq3R/K0TcRhrNA8h7o1AqHX0klEhM= -github.com/ClickHouse/clickhouse-go/v2 v2.35.0 h1:ZMLZqxu+NiW55f4JS32kzyEbMb7CthGn3ziCcULOvSE= -github.com/ClickHouse/clickhouse-go/v2 v2.35.0/go.mod h1:O2FFT/rugdpGEW2VKyEGyMUWyQU0ahmenY9/emxLPxs= +github.com/ClickHouse/clickhouse-go/v2 v2.37.1 h1:AvNJQW0QJudpl6JjH8WyMfu2s3ruWxtp0E1WZKmZXLc= +github.com/ClickHouse/clickhouse-go/v2 v2.37.1/go.mod h1:1KKjGFSWu2R/oa7DKWJLlhTOtyCld7VJDEtXTe+2QKU= github.com/DataDog/zstd v1.5.7 h1:ybO8RBeh29qrxIhCA9E8gKY6xfONU9T6G6aP9DTKfLE= github.com/DataDog/zstd v1.5.7/go.mod h1:g4AWEaM3yOg3HYfnJ3YIawPnVdXJh9QME85blwSAmyw= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.28.0 h1:VaFXBL0NJpiFBtw4aVJpKHeKULVTcHpD+/G0ibZkcBw= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.28.0/go.mod h1:JXkPazkEc/dZTHzOlzv2vT1DlpWSTbSLmu/1KY6Ly0I= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.52.0 h1:QFgWzcdmJlgEAwJz/zePYVJQxfoJGRtgIqZfIUFg5oQ= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.52.0/go.mod h1:ayYHuYU7iNcNtEs1K9k6D/Bju7u1VEHMQm5qQ1n3GtM= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.52.0 h1:0l8ynskVvq1dvIn5vJbFMf/a/3TqFpRmCMrruFbzlvk= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.52.0/go.mod h1:f/ad5NuHnYz8AOZGuR0cY+l36oSCstdxD73YlIchr6I= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.52.0 h1:wbMd4eG/fOhsCa6+IP8uEDvWF5vl7rNoUWmP5f72Tbs= -github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.52.0/go.mod h1:gdIm9TxRk5soClCwuB0FtdXsbqtw0aqPwBEurK9tPkw= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0 h1:UQUsRi8WTzhZntp5313l+CHIAT95ojUI2lpP/ExlZa4= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/detectors/gcp v1.29.0/go.mod h1:Cz6ft6Dkn3Et6l2v2a9/RpN7epQ1GtDlO6lj8bEcOvw= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0 h1:owcC2UnmsZycprQ5RfRgjydWhuoxg71LUfyiQdijZuM= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/exporter/metric v0.53.0/go.mod h1:ZPpqegjbE99EPKsu3iUWV22A04wzGPcAY/ziSIQEEgs= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0 h1:4LP6hvB4I5ouTbGgWtixJhgED6xdf67twf9PoY96Tbg= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/cloudmock v0.53.0/go.mod h1:jUZ5LYlw40WMd07qxcQJD5M40aUxrfwqQX1g7zxYnrQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0 h1:Ron4zCA/yk6U7WOBXhTJcDpsUBG9npumK6xw2auFltQ= +github.com/GoogleCloudPlatform/opentelemetry-operations-go/internal/resourcemapping v0.53.0/go.mod h1:cSgYe11MCNYunTnRXrKiR/tHc0eoKjICUuWpNZoVCOo= github.com/Masterminds/semver v1.5.0 h1:H65muMkzWKEuNDnfl9d70GUjFniHKHRbFPGBuZ3QEww= github.com/Masterminds/semver v1.5.0/go.mod h1:MB6lktGJrhw8PrUyiEoblNEGEQ+RzHPF078ddwwvV3Y= github.com/PeerDB-io/glua64 v1.0.1 h1:biXLlFF/L5pnJCwDon7hkWkuQPozC8NjKS3J7Wzi69I= @@ -102,60 +104,60 @@ github.com/alecthomas/repr v0.4.0 h1:GhI2A8MACjfegCPVq9f1FLvIBS+DrQ2KQBFZP1iFzXc github.com/alecthomas/repr v0.4.0/go.mod h1:Fr0507jx4eOXV7AlPV6AVZLYrLIuIeSOWtW57eE/O/4= github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA= github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA= -github.com/apache/arrow-go/v18 v18.3.0 h1:Xq4A6dZj9Nu33sqZibzn012LNnewkTUlfKVUFD/RX/I= -github.com/apache/arrow-go/v18 v18.3.0/go.mod h1:eEM1DnUTHhgGAjf/ChvOAQbUQ+EPohtDrArffvUjPg8= +github.com/apache/arrow-go/v18 v18.3.1 h1:oYZT8FqONiK74JhlH3WKVv+2NKYoyZ7C2ioD4Dj3ixk= +github.com/apache/arrow-go/v18 v18.3.1/go.mod h1:12QBya5JZT6PnBihi5NJTzbACrDGXYkrgjujz3MRQXU= github.com/apache/arrow/go/v15 v15.0.2 h1:60IliRbiyTWCWjERBCkO1W4Qun9svcYoZrSLcyOsMLE= github.com/apache/arrow/go/v15 v15.0.2/go.mod h1:DGXsR3ajT524njufqf95822i+KTh+yea1jass9YXgjA= github.com/apache/thrift v0.22.0 h1:r7mTJdj51TMDe6RtcmNdQxgn9XcyfGDOzegMDRg47uc= github.com/apache/thrift v0.22.0/go.mod h1:1e7J/O1Ae6ZQMTYdy9xa3w9k+XHWPfRvdPyJeynQ+/g= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2 h1:DklsrG3dyBCFEj5IhUbnKptjxatkF07cF2ak3yi77so= github.com/asaskevich/govalidator v0.0.0-20230301143203-a9d515a09cc2/go.mod h1:WaHUgvxTVq04UNunO+XhnAqY/wQc+bxr74GqbsZ/Jqw= -github.com/aws/aws-sdk-go-v2 v1.36.3 h1:mJoei2CxPutQVxaATCzDUjcZEjVRdpsiiXi2o38yqWM= -github.com/aws/aws-sdk-go-v2 v1.36.3/go.mod h1:LLXuLpgzEbD766Z5ECcRmi8AzSwfZItDtmABVkRLGzg= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10 h1:zAybnyUQXIZ5mok5Jqwlf58/TFE7uvd3IAsa1aF9cXs= -github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.10/go.mod h1:qqvMj6gHLR/EXWZw4ZbqlPbQUyenf4h82UQUlKc+l14= -github.com/aws/aws-sdk-go-v2/config v1.29.14 h1:f+eEi/2cKCg9pqKBoAIwRGzVb70MRKqWX4dg1BDcSJM= -github.com/aws/aws-sdk-go-v2/config v1.29.14/go.mod h1:wVPHWcIFv3WO89w0rE10gzf17ZYy+UVS1Geq8Iei34g= -github.com/aws/aws-sdk-go-v2/credentials v1.17.67 h1:9KxtdcIA/5xPNQyZRgUSpYOE6j9Bc4+D7nZua0KGYOM= -github.com/aws/aws-sdk-go-v2/credentials v1.17.67/go.mod h1:p3C44m+cfnbv763s52gCqrjaqyPikj9Sg47kUVaNZQQ= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30 h1:x793wxmUWVDhshP8WW2mlnXuFrO4cOd3HLBroh1paFw= -github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.30/go.mod h1:Jpne2tDnYiFascUEs2AWHJL9Yp7A5ZVy3TNyxaAjD6M= -github.com/aws/aws-sdk-go-v2/feature/rds/auth v1.5.11 h1:qDk85oQdhwP4NR1RpkN+t40aN46/K96hF9J1vDRrkKM= -github.com/aws/aws-sdk-go-v2/feature/rds/auth v1.5.11/go.mod h1:f3MkXuZsT+wY24nLIP+gFUuIVQkpVopxbpUD/GUZK0Q= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.77 h1:xaRN9fags7iJznsMEjtcEuON1hGfCZ0y5MVfEMKtrx8= -github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.77/go.mod h1:lolsiGkT47AZ3DWqtxgEQM/wVMpayi7YWNjl3wHSRx8= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34 h1:ZK5jHhnrioRkUNOc+hOgQKlUL5JeC3S6JgLxtQ+Rm0Q= -github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.34/go.mod h1:p4VfIceZokChbA9FzMbRGz5OV+lekcVtHlPKEO0gSZY= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34 h1:SZwFm17ZUNNg5Np0ioo/gq8Mn6u9w19Mri8DnJ15Jf0= -github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.34/go.mod h1:dFZsC0BLo346mvKQLWmoJxT+Sjp+qcVR1tRVHQGOH9Q= +github.com/aws/aws-sdk-go-v2 v1.36.5 h1:0OF9RiEMEdDdZEMqF9MRjevyxAQcf6gY+E7vwBILFj0= +github.com/aws/aws-sdk-go-v2 v1.36.5/go.mod h1:EYrzvCCN9CMUTa5+6lf6MM4tq3Zjp8UhSGR/cBsjai0= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 h1:12SpdwU8Djs+YGklkinSSlcrPyj3H4VifVsKf78KbwA= +github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11/go.mod h1:dd+Lkp6YmMryke+qxW/VnKyhMBDTYP41Q2Bb+6gNZgY= +github.com/aws/aws-sdk-go-v2/config v1.29.17 h1:jSuiQ5jEe4SAMH6lLRMY9OVC+TqJLP5655pBGjmnjr0= +github.com/aws/aws-sdk-go-v2/config v1.29.17/go.mod h1:9P4wwACpbeXs9Pm9w1QTh6BwWwJjwYvJ1iCt5QbCXh8= +github.com/aws/aws-sdk-go-v2/credentials v1.17.70 h1:ONnH5CM16RTXRkS8Z1qg7/s2eDOhHhaXVd72mmyv4/0= +github.com/aws/aws-sdk-go-v2/credentials v1.17.70/go.mod h1:M+lWhhmomVGgtuPOhO85u4pEa3SmssPTdcYpP/5J/xc= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32 h1:KAXP9JSHO1vKGCr5f4O6WmlVKLFFXgWYAGoJosorxzU= +github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.32/go.mod h1:h4Sg6FQdexC1yYG9RDnOvLbW1a/P986++/Y/a+GyEM8= +github.com/aws/aws-sdk-go-v2/feature/rds/auth v1.5.13 h1:bJoSh9iQrFpt/u1A0fiSEwhrFkzhhQIvoa+mLkoNbVI= +github.com/aws/aws-sdk-go-v2/feature/rds/auth v1.5.13/go.mod h1:RxLhhGmjEidlLTRZyk1BLMigHONURhQakw2//prq+DA= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.81 h1:E5ff1vZlAudg24j5lF6F6/gBpln2LjWxGdQDBSLfVe4= +github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.81/go.mod h1:hHBLCuhHI4Aokvs5vdVoCDBzmFy86yxs5J7LEPQwQEM= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36 h1:SsytQyTMHMDPspp+spo7XwXTP44aJZZAC7fBV2C5+5s= +github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.36/go.mod h1:Q1lnJArKRXkenyog6+Y+zr7WDpk4e6XlR6gs20bbeNo= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36 h1:i2vNHQiXUvKhs3quBR6aqlgJaiaexz/aNvdCktW/kAM= +github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.36/go.mod h1:UdyGa7Q91id/sdyHPwth+043HhmP6yP9MBHgbZM0xo8= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 h1:bIqFDwgGXXN1Kpp99pDOdKMTTb5d2KyU5X/BZxjOkRo= github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3/go.mod h1:H5O/EsxDWyU+LP/V8i5sm8cxoZgc2fdNR9bxlOFrQTo= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34 h1:ZNTqv4nIdE/DiBfUUfXcLZ/Spcuz+RjeziUtNJackkM= -github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.34/go.mod h1:zf7Vcd1ViW7cPqYWEHLHJkS50X0JS2IKz9Cgaj6ugrs= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3 h1:eAh2A4b5IzM/lum78bZ590jy36+d/aFLgKF/4Vd1xPE= -github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.3/go.mod h1:0yKJC/kb8sAnmlYa6Zs3QVYqaC8ug2AbnNChv5Ox3uA= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.2 h1:BCG7DCXEXpNCcpwCxg1oi9pkJWH2+eZzTn9MY56MbVw= -github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.2/go.mod h1:iu6FSzgt+M2/x3Dk8zhycdIcHjEFb36IS8HVUVFoMg0= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15 h1:dM9/92u2F1JbDaGooxTq18wmmFzbJRfXfVfy96/1CXM= -github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.15/go.mod h1:SwFBy2vjtA0vZbjjaFtfN045boopadnoVPhu4Fv66vY= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15 h1:moLQUoVq91LiqT1nbvzDukyqAlCv89ZmwaHw/ZFlFZg= -github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.15/go.mod h1:ZH34PJUc8ApjBIfgQCFvkWcUDBtl/WTD+uiYHjd8igA= -github.com/aws/aws-sdk-go-v2/service/kms v1.38.3 h1:RivOtUH3eEu6SWnUMFHKAW4MqDOzWn1vGQ3S38Y5QMg= -github.com/aws/aws-sdk-go-v2/service/kms v1.38.3/go.mod h1:cQn6tAF77Di6m4huxovNM7NVAozWTZLsDRp9t8Z/WYk= -github.com/aws/aws-sdk-go-v2/service/s3 v1.80.0 h1:fV4XIU5sn/x8gjRouoJpDVHj+ExJaUk4prYF+eb6qTs= -github.com/aws/aws-sdk-go-v2/service/s3 v1.80.0/go.mod h1:qbn305Je/IofWBJ4bJz/Q7pDEtnnoInw/dGt71v6rHE= -github.com/aws/aws-sdk-go-v2/service/ses v1.30.2 h1:idN+0zMCMQw0VtCHavmq0n/uaNeLi851q3XTa86oxHE= -github.com/aws/aws-sdk-go-v2/service/ses v1.30.2/go.mod h1:eZW5lSNTE1tQfMpl6crr/YVJYgEcnk2JQoodg6E63qM= -github.com/aws/aws-sdk-go-v2/service/sns v1.34.4 h1:ihddI5wufQQCJiujUgAvWRqZcfDmSKIfXlAuX7T95cg= -github.com/aws/aws-sdk-go-v2/service/sns v1.34.4/go.mod h1:PJtxxMdj747j8DeZENRTTYAz/lx/pADn/U0k7YNNiUY= -github.com/aws/aws-sdk-go-v2/service/sso v1.25.3 h1:1Gw+9ajCV1jogloEv1RRnvfRFia2cL6c9cuKV2Ps+G8= -github.com/aws/aws-sdk-go-v2/service/sso v1.25.3/go.mod h1:qs4a9T5EMLl/Cajiw2TcbNt2UNo/Hqlyp+GiuG4CFDI= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1 h1:hXmVKytPfTy5axZ+fYbR5d0cFmC3JvwLm5kM83luako= -github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.1/go.mod h1:MlYRNmYu/fGPoxBQVvBYr9nyr948aY/WLUvwBMBJubs= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.19 h1:1XuUZ8mYJw9B6lzAkXhqHlJd/XvaX32evhproijJEZY= -github.com/aws/aws-sdk-go-v2/service/sts v1.33.19/go.mod h1:cQnB8CUnxbMU82JvlqjKR2HBOm3fe9pWorWBza6MBJ4= -github.com/aws/smithy-go v1.22.3 h1:Z//5NuZCSW6R4PhQ93hShNbyBbn8BWCmCVCt+Q8Io5k= -github.com/aws/smithy-go v1.22.3/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.36 h1:GMYy2EOWfzdP3wfVAGXBNKY5vK4K8vMET4sYOYltmqs= +github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.36/go.mod h1:gDhdAV6wL3PmPqBhiPbnlS447GoWs8HTTOYef9/9Inw= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 h1:CXV68E2dNqhuynZJPB80bhPQwAKqBWVer887figW6Jc= +github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4/go.mod h1:/xFi9KtvBXP97ppCz1TAEvU1Uf66qvid89rbem3wCzQ= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.4 h1:nAP2GYbfh8dd2zGZqFRSMlq+/F6cMPBUuCsGAMkN074= +github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.4/go.mod h1:LT10DsiGjLWh4GbjInf9LQejkYEhBgBCjLG5+lvk4EE= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17 h1:t0E6FzREdtCsiLIoLCWsYliNsRBgyGD/MCK571qk4MI= +github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.17/go.mod h1:ygpklyoaypuyDvOM5ujWGrYWpAK3h7ugnmKCU/76Ys4= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.17 h1:qcLWgdhq45sDM9na4cvXax9dyLitn8EYBRl8Ak4XtG4= +github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.17/go.mod h1:M+jkjBFZ2J6DJrjMv2+vkBbuht6kxJYtJiwoVgX4p4U= +github.com/aws/aws-sdk-go-v2/service/kms v1.41.1 h1:dkaX98cOXw4EgqpDXPqrVVLjsPR9T24wA2TcjrQiank= +github.com/aws/aws-sdk-go-v2/service/kms v1.41.1/go.mod h1:Pqd9k4TuespkireN206cK2QBsaBTL6X+VPAez5Qcijk= +github.com/aws/aws-sdk-go-v2/service/s3 v1.81.0 h1:1GmCadhKR3J2sMVKs2bAYq9VnwYeCqfRyZzD4RASGlA= +github.com/aws/aws-sdk-go-v2/service/s3 v1.81.0/go.mod h1:kUklwasNoCn5YpyAqC/97r6dzTA1SRKJfKq16SXeoDU= +github.com/aws/aws-sdk-go-v2/service/ses v1.30.4 h1:VT+yYtHKQiDJrNAsvoO2ExMUN3KxWsFRt+S5j1MdFGk= +github.com/aws/aws-sdk-go-v2/service/ses v1.30.4/go.mod h1:Zftob00wu8O9xWSN1pdczm1U+E6yXk9znf+4lkt+3aQ= +github.com/aws/aws-sdk-go-v2/service/sns v1.34.7 h1:OBuZE9Wt8h2imuRktu+WfjiTGrnYdCIJg8IX92aalHE= +github.com/aws/aws-sdk-go-v2/service/sns v1.34.7/go.mod h1:4WYoZAhHt+dWYpoOQUgkUKfuQbE6Gg/hW4oXE0pKS9U= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.5 h1:AIRJ3lfb2w/1/8wOOSqYb9fUKGwQbtysJ2H1MofRUPg= +github.com/aws/aws-sdk-go-v2/service/sso v1.25.5/go.mod h1:b7SiVprpU+iGazDUqvRSLf5XmCdn+JtT1on7uNL6Ipc= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3 h1:BpOxT3yhLwSJ77qIY3DoHAQjZsc4HEGfMCE4NGy3uFg= +github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.3/go.mod h1:vq/GQR1gOFLquZMSrxUK/cpvKCNVYibNyJ1m7JrU88E= +github.com/aws/aws-sdk-go-v2/service/sts v1.34.0 h1:NFOJ/NXEGV4Rq//71Hs1jC/NvPs1ezajK+yQmkwnPV0= +github.com/aws/aws-sdk-go-v2/service/sts v1.34.0/go.mod h1:7ph2tGpfQvwzgistp2+zga9f+bCjlQJPkPUmMgDSD7w= +github.com/aws/smithy-go v1.22.4 h1:uqXzVZNuNexwc/xrh6Tb56u89WDlJY6HS+KC0S4QSjw= +github.com/aws/smithy-go v1.22.4/go.mod h1:t1ufH5HMublsJYulve2RKmHDC15xu1f26kHCp/HgceI= github.com/benbjohnson/clock v1.1.0/go.mod h1:J11/hYXuz8f4ySSvYwY0FKfm+ezbsZBKZxNJlLklBHA= github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= @@ -167,8 +169,8 @@ github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= -github.com/cloudfoundry/gosigar v1.3.92 h1:XWSX71qvUMPq+GVPTb3jszOTrb5fcHcbK60LZPaTfbY= -github.com/cloudfoundry/gosigar v1.3.92/go.mod h1:u8AAA406V/2T5BmE8LiLVjLFWLMXT7S65VHeQWnWMOY= +github.com/cloudfoundry/gosigar v1.3.94 h1:jT9cluVqkqPIqohL5c+LL48t+gv1JtbEytKncz9F7p4= +github.com/cloudfoundry/gosigar v1.3.94/go.mod h1:+67J31661uUAzzH3Y1UEM8hyKMCfyWbDW1blbsc/KYQ= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443 h1:aQ3y1lwWyqYPiWZThqv1aFbZMiM9vblcSArJRf2Irls= github.com/cncf/xds/go v0.0.0-20250501225837-2ac532fd4443/go.mod h1:W+zGtBO5Y1IgJhy4+A9GOqVhqLpfZi+vwmdNXUehLA8= @@ -217,11 +219,6 @@ github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da h1:aIftn67I1fkbMa5 github.com/dgryski/go-farm v0.0.0-20240924180020-3414d57e47da/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f h1:lO4WD4F/rVNCu3HqELle0jiPLLBs70cWOduZpkS1E78= github.com/dgryski/go-rendezvous v0.0.0-20200823014737-9f7001d12a5f/go.mod h1:cuUVRXasLTGF7a8hSLbxyZXjz+1KgoB3wDUb6vlszIc= -github.com/djherbis/buffer v1.1.0/go.mod h1:VwN8VdFkMY0DCALdY8o00d3IZ6Amz/UNVMWcSaJT44o= -github.com/djherbis/buffer v1.2.0 h1:PH5Dd2ss0C7CRRhQCZ2u7MssF+No9ide8Ye71nPHcrQ= -github.com/djherbis/buffer v1.2.0/go.mod h1:fjnebbZjCUpPinBRD+TDwXSOeNQ7fPQWLfGQqiAiUyE= -github.com/djherbis/nio/v3 v3.0.1 h1:6wxhnuppteMa6RHA4L81Dq7ThkZH8SwnDzXDYy95vB4= -github.com/djherbis/nio/v3 v3.0.1/go.mod h1:Ng4h80pbZFMla1yKzm61cF0tqqilXZYrogmWgZxOcmg= github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4= github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk= github.com/dolthub/maphash v0.1.0 h1:bsQ7JsF4FkkWyrP3oCnFJgrCUAFbFf3kOl4L/QxPDyQ= @@ -234,8 +231,8 @@ github.com/dvsekhvalnov/jose2go v1.8.0 h1:LqkkVKAlHFfH9LOEl5fe4p/zL02OhWE7pCufMB github.com/dvsekhvalnov/jose2go v1.8.0/go.mod h1:QsHjhyTlD/lAVqn/NSbVZmSCGeDehTB/mPZadG+mhXU= github.com/elastic/elastic-transport-go/v8 v8.7.0 h1:OgTneVuXP2uip4BA658Xi6Hfw+PeIOod2rY3GVMGoVE= github.com/elastic/elastic-transport-go/v8 v8.7.0/go.mod h1:YLHer5cj0csTzNFXoNQ8qhtGY1GTvSqPnKWKaqQE3Hk= -github.com/elastic/go-elasticsearch/v8 v8.18.0 h1:ANNq1h7DEiPUaALb8+5w3baQzaS08WfHV0DNzp0VG4M= -github.com/elastic/go-elasticsearch/v8 v8.18.0/go.mod h1:WLqwXsJmQoYkoA9JBFeEwPkQhCfAZuUvfpdU/NvSSf0= +github.com/elastic/go-elasticsearch/v8 v8.18.1 h1:lPsN2Wk6+QqBeD4ckmOax7G/Y8tAZgroDYG8j6/5Ce0= +github.com/elastic/go-elasticsearch/v8 v8.18.1/go.mod h1:F3j9e+BubmKvzvLjNui/1++nJuJxbkhHefbaT0kFKGY= github.com/emicklei/go-restful/v3 v3.12.2 h1:DhwDP0vY3k8ZzE0RunuJy8GhNpPL6zqLkDf9B/a0/xU= github.com/emicklei/go-restful/v3 v3.12.2/go.mod h1:6n3XBCmQQb25CM2LCACGz8ukIrRry+4bhvbpWn3mrbc= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= @@ -292,6 +289,10 @@ github.com/go-openapi/jsonreference v0.21.0 h1:Rs+Y7hSXT83Jacb7kFyjn4ijOuVGSvOdF github.com/go-openapi/jsonreference v0.21.0/go.mod h1:LmZmgsrTkVg9LG4EaHeY8cBDslNPMo06cago5JNLkm4= github.com/go-openapi/swag v0.23.1 h1:lpsStH0n2ittzTnbaSloVZLuB5+fvSY/+hnagBjSNZU= github.com/go-openapi/swag v0.23.1/go.mod h1:STZs8TbRvEQQKUA+JZNAm3EWlgaOBGpyFDqQnDHMef0= +github.com/go-pg/pg/v10 v10.11.0 h1:CMKJqLgTrfpE/aOVeLdybezR2om071Vh38OLZjsyMI0= +github.com/go-pg/pg/v10 v10.11.0/go.mod h1:4BpHRoxE61y4Onpof3x1a2SQvi9c+q1dJnrNdMjsroA= +github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4mU= +github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo= github.com/go-sql-driver/mysql v1.7.1 h1:lUIinVbN1DY0xBg0eMOzmmtGoHwWBbvnWubQUrtU8EI= github.com/go-sql-driver/mysql v1.7.1/go.mod h1:OXbVy3sEdcQ2Doequ6Z5BW6fXNQTmx+9S1MCJN5yJMI= github.com/go-stack/stack v1.8.0/go.mod h1:v0f6uXyyMGvRgIKkXu+yp6POWl0qKG85gN/melR3HDY= @@ -299,6 +300,8 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8= github.com/go-test/deep v1.1.1 h1:0r/53hagsehfO4bzD2Pgr/+RgHqhmf+k1Bpse2cTu1U= github.com/go-test/deep v1.1.1/go.mod h1:5C2ZWiW0ErCdrYzpqxLbTX7MG14M9iiw8DgHncVwcsE= +github.com/go-viper/mapstructure/v2 v2.3.0 h1:27XbWsHIqhbdR5TIC911OfYvgSaW93HM+dX7970Q7jk= +github.com/go-viper/mapstructure/v2 v2.3.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= github.com/goccy/go-json v0.10.5 h1:Fq85nIqj+gXn/S5ahsiTlK3TmC85qgirsdTP/+DeaC4= github.com/goccy/go-json v0.10.5/go.mod h1:oq7eo15ShAhp70Anwd5lgX2pLfOS3QCiwU/PULtXL6M= github.com/godbus/dbus v0.0.0-20190726142602-4481cbc300e2 h1:ZpnhV/YsD2/4cESfV5+Hoeu/iUR3ruzNvZ+yQfO03a0= @@ -357,8 +360,8 @@ github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/martian/v3 v3.3.3 h1:DIhPTQrbPkgs2yJYdXU/eNACCG5DVQjySNRNlflZ9Fc= github.com/google/martian/v3 v3.3.3/go.mod h1:iEPrYcgCF7jA9OtScMFQyAlZZ4YXTKEtJ1E6RWzmBA0= -github.com/google/pprof v0.0.0-20250501235452-c0086092b71a h1:rDA3FfmxwXR+BVKKdz55WwMJ1pD2hJQNW31d+l3mPk4= -github.com/google/pprof v0.0.0-20250501235452-c0086092b71a/go.mod h1:5hDyRhoBCxViHszMt12TnOpEI4VVi+U8Gm9iphldiMA= +github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a h1://KbezygeMJZCSHH+HgUZiTeSoiuFspbMg1ge+eFj18= +github.com/google/pprof v0.0.0-20250607225305-033d6d78b36a/go.mod h1:5hDyRhoBCxViHszMt12TnOpEI4VVi+U8Gm9iphldiMA= github.com/google/renameio v0.1.0/go.mod h1:KWCgfxg9yswjAJkECMjeO8J8rahYeXnNhOm40UhjYkI= github.com/google/s2a-go v0.1.9 h1:LGD7gtMgezd8a/Xak7mEWL0PjoTQFvpRudN895yqKW0= github.com/google/s2a-go v0.1.9/go.mod h1:YA0Ei2ZQL3acow2O62kdp9UlnvMmU7kA6Eutn0dXayM= @@ -375,12 +378,12 @@ github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674 h1:JeSE6pjso5T github.com/gorilla/websocket v1.5.4-0.20250319132907-e064f32e3674/go.mod h1:r4w70xmWCQKmi1ONH4KIaBptdivuRPyosB9RmPlGEwA= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0 h1:UH//fgunKIs4JdUbpDl1VZCDaL56wXCB/5+wF6uHfaI= github.com/grpc-ecosystem/go-grpc-middleware v1.4.0/go.mod h1:g5qyo/la0ALbONm6Vbp88Yd8NsDy6rZz+RcrMPxvld8= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo= -github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.0 h1:+epNPbD5EqgpEMm5wrl4Hqts3jZt8+kYaqUisuuIGTk= +github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.0/go.mod h1:Zanoh4+gvIgluNqcfMVTJueD4wSS5hT7zTt4Mrutd90= github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c h1:6rhixN/i8ZofjG1Y75iExal34USq5p+wiN1tpie8IrU= github.com/gsterjov/go-libsecret v0.0.0-20161001094733-a6f4afe4910c/go.mod h1:NMPJylDgVpX0MLRlPy15sqSwOFv/U1GZ2m21JhFfek0= -github.com/hamba/avro/v2 v2.28.0 h1:E8J5D27biyAulWKNiEBhV85QPc9xRMCUCGJewS0KYCE= -github.com/hamba/avro/v2 v2.28.0/go.mod h1:9TVrlt1cG1kkTUtm9u2eO5Qb7rZXlYzoKqPt8TSH+TA= +github.com/hamba/avro/v2 v2.29.0 h1:fkqoWEPxfygZxrkktgSHEpd0j/P7RKTBTDbcEeMdVEY= +github.com/hamba/avro/v2 v2.29.0/go.mod h1:Pk3T+x74uJoJOFmHrdJ8PRdgSEL/kEKteJ31NytCKxI= github.com/hexops/gotextdiff v1.0.3 h1:gitA9+qJrrTCsiCl7+kh75nPqQt1cx4ZkudSTLoUqJM= github.com/hexops/gotextdiff v1.0.3/go.mod h1:pSWU5MAI3yDq+fZBTazCSJysOMbxWL1BSow5/V2vxeg= github.com/influxdata/tdigest v0.0.1 h1:XpFptwYmnEKUqmkcDjrzffswZ3nvNeevbUSLPP/ZzIY= @@ -399,6 +402,12 @@ github.com/jackc/pgx/v5 v5.7.5 h1:JHGfMnQY+IEtGM63d+NGMjoRpysB2JBwDr5fsngwmJs= github.com/jackc/pgx/v5 v5.7.5/go.mod h1:aruU7o91Tc2q2cFp5h4uP3f6ztExVpyVv88Xl/8Vl8M= github.com/jackc/puddle/v2 v2.2.2 h1:PR8nw+E/1w0GLuRFSmiioY6UooMp6KJv0/61nB7icHo= github.com/jackc/puddle/v2 v2.2.2/go.mod h1:vriiEXHvEE654aYKXXjOvZM39qJ0q+azkZFrfEOc3H4= +github.com/jinzhu/inflection v1.0.0 h1:K317FqzuhWc8YvSVlFMCCUb36O/S9MCKRDI7QkRKD/E= +github.com/jinzhu/inflection v1.0.0/go.mod h1:h+uFLlag+Qp1Va5pdKtLDYj+kHp5pxUVkryuEj+Srlc= +github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= +github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= +github.com/jmoiron/sqlx v1.3.5 h1:vFFPA71p1o5gAeqtEAwLU4dnX2napprKtHr7PYIcN3g= +github.com/jmoiron/sqlx v1.3.5/go.mod h1:nRVWtLre0KfCLJvgxzCsLVMogSvQ1zNJtpYr2Ccp0mQ= github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= github.com/josharian/intern v1.0.0 h1:vlS4z54oSdjm0bgjRigI+G1HpF+tI+9rE5LLzOg8HmY= @@ -429,8 +438,8 @@ github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= -github.com/lestrrat-go/blackmagic v1.0.3 h1:94HXkVLxkZO9vJI/w2u1T0DAoprShFd13xtnSINtDWs= -github.com/lestrrat-go/blackmagic v1.0.3/go.mod h1:6AWFyKNNj0zEXQYfTMPfZrAXUWUfTIZ5ECEUEJaijtw= +github.com/lestrrat-go/blackmagic v1.0.4 h1:IwQibdnf8l2KoO+qC3uT4OaTWsW7tuRQXy9TRN9QanA= +github.com/lestrrat-go/blackmagic v1.0.4/go.mod h1:6AWFyKNNj0zEXQYfTMPfZrAXUWUfTIZ5ECEUEJaijtw= github.com/lestrrat-go/httpcc v1.0.1 h1:ydWCStUeJLkpYyjLDHihupbn2tYmZ7m22BGkcvZZrIE= github.com/lestrrat-go/httpcc v1.0.1/go.mod h1:qiltp3Mt56+55GPVCbTdM9MlqhvzyuL6W/NMDA8vA5E= github.com/lestrrat-go/httprc v1.0.6 h1:qgmgIRhpvBqexMJjA/PmwSvhNk679oqD1RbovdCGW8k= @@ -452,8 +461,6 @@ github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8 h1:AMFGa4R4MiIpsp github.com/minio/asm2plan9s v0.0.0-20200509001527-cdd76441f9d8/go.mod h1:mC1jAcsrzbxHt8iiaC+zU4b1ylILSosueou12R++wfY= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3 h1:+n/aFZefKZp7spd8DFdX7uMikMLXX4oubIzJF4kv/wI= github.com/minio/c2goasm v0.0.0-20190812172519-36a3d3bbc4f3/go.mod h1:RagcQ7I8IeTMnF8JTXieKnO4Z6JCsikNEzj0DwauVzE= -github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= -github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= @@ -491,21 +498,23 @@ github.com/paulmach/orb v0.11.1/go.mod h1:5mULz1xQfs3bmQm63QEJA6lNGujuRafwA5S/En github.com/paulmach/protoscan v0.2.1/go.mod h1:SpcSwydNLrxUGSDvXvO0P7g7AuhJ7lcKfDlhJCDw2gY= github.com/petermattis/goid v0.0.0-20240813172612-4fcff4a6cae7 h1:Dx7Ovyv/SFnMFw3fD4oEoeorXc6saIiQ23LrGLth0Gw= github.com/petermattis/goid v0.0.0-20240813172612-4fcff4a6cae7/go.mod h1:pxMtw7cyUw6B2bRH0ZBANSPg+AoSud1I1iyJHI69jH4= +github.com/pgvector/pgvector-go v0.3.0 h1:Ij+Yt78R//uYqs3Zk35evZFvr+G0blW0OUN+Q2D1RWc= +github.com/pgvector/pgvector-go v0.3.0/go.mod h1:duFy+PXWfW7QQd5ibqutBO4GxLsUZ9RVXhFZGIBsWSA= github.com/pierrec/lz4/v4 v4.1.22 h1:cKFw6uJDK+/gfw5BcDL0JL5aBsAFdsIT18eRtLj7VIU= github.com/pierrec/lz4/v4 v4.1.22/go.mod h1:gZWDp/Ze/IJXGXf23ltt2EXimqmTUXEy0GFuRQyBid4= github.com/pingcap/badger v1.5.1-0.20241015064302-38533b6cbf8d h1:eHcokyHxm7HVM+7+Qy1zZwC7NhX9wVNX8oQDcSZw1qI= github.com/pingcap/badger v1.5.1-0.20241015064302-38533b6cbf8d/go.mod h1:KiO2zumBCWx7yoVYoFRpb+DNrwEPk1pR1LF7NvOACMQ= github.com/pingcap/errors v0.11.0/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= github.com/pingcap/errors v0.11.5-0.20190809092503-95897b64e011/go.mod h1:Oi8TUi2kEtXXLMJk9l1cGmz20kV3TaQ0usTwv5KuLY8= -github.com/pingcap/errors v0.11.5-0.20240318064555-6bd07397691f h1:FxA+NgsdHNOv+/hZGxUh8Gb3WuZqgqmxDwztEOiA1v4= -github.com/pingcap/errors v0.11.5-0.20240318064555-6bd07397691f/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= +github.com/pingcap/errors v0.11.5-0.20250523034308-74f78ae071ee h1:/IDPbpzkzA97t1/Z1+C3KlxbevjMeaI6BQYxvivu4u8= +github.com/pingcap/errors v0.11.5-0.20250523034308-74f78ae071ee/go.mod h1:X2r9ueLEUZgtx2cIogM0v4Zj5uvvzhuuiu7Pn8HzMPg= github.com/pingcap/failpoint v0.0.0-20240528011301-b51a646c7c86 h1:tdMsjOqUR7YXHoBitzdebTvOjs/swniBTOLy5XiMtuE= github.com/pingcap/failpoint v0.0.0-20240528011301-b51a646c7c86/go.mod h1:exzhVYca3WRtd6gclGNErRWb1qEgff3LYta0LvRmON4= github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989 h1:surzm05a8C9dN8dIUmo4Be2+pMRb6f55i+UIYrluu2E= github.com/pingcap/goleveldb v0.0.0-20191226122134-f82aafb29989/go.mod h1:O17XtbryoCJhkKGbT62+L2OlrniwqiGLSqrmdHCMzZw= github.com/pingcap/kvproto v0.0.0-20191211054548-3c6b38ea5107/go.mod h1:WWLmULLO7l8IOcQG+t+ItJ3fEcrL5FxF0Wu+HrMy26w= -github.com/pingcap/kvproto v0.0.0-20250530091117-d581b50ffdbd h1:fz7X718PidDSAQsHcv9AbGw++dXY/3cJek00/fuEsy4= -github.com/pingcap/kvproto v0.0.0-20250530091117-d581b50ffdbd/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= +github.com/pingcap/kvproto v0.0.0-20250616075548-d951fb623bb3 h1:OcZxUJEwZzFIqY8AkRIHuEK8U1X5OyLfqAwVnhaKsag= +github.com/pingcap/kvproto v0.0.0-20250616075548-d951fb623bb3/go.mod h1:rXxWk2UnwfUhLXha1jxRWPADw9eMZGWEWCg92Tgmb/8= github.com/pingcap/log v0.0.0-20210625125904-98ed8e2eb1c7/go.mod h1:8AanEdAHATuRurdGxZXBz0At+9avep+ub7U1AGYLIMM= github.com/pingcap/log v1.1.1-0.20241212030209-7e3ff8601a2a h1:WIhmJBlNGmnCWH6TLMdZfNEDaiU8cFpZe3iaqDbQ0M8= github.com/pingcap/log v1.1.1-0.20241212030209-7e3ff8601a2a/go.mod h1:ORfBOFp1eteu2odzsyaxI+b8TzJwgjwyQcGhI+9SfEA= @@ -513,8 +522,8 @@ github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5 h1:T4pXRhBflzDeA github.com/pingcap/sysutil v1.0.1-0.20240311050922-ae81ee01f3a5/go.mod h1:rlimy0GcTvjiJqvD5mXTRr8O2eNZPBrcUgiWVYp9530= github.com/pingcap/tidb v0.0.0-20250130070702-43f2fb91d740 h1:sVzD98TPvQSw0JS0D6u2GfT0yu+U6P4pFFF2l3C6N4Y= github.com/pingcap/tidb v0.0.0-20250130070702-43f2fb91d740/go.mod h1:FCAJgaQPFGKsPOKttMA7jPfdQR9sLyVa9HfdncbuchM= -github.com/pingcap/tidb/pkg/parser v0.0.0-20250531022214-e7b038b99132 h1:jIo9aM90dQhOo11Dn/f+hIENAfF9DtpKzeFTkkh9n0o= -github.com/pingcap/tidb/pkg/parser v0.0.0-20250531022214-e7b038b99132/go.mod h1:+8feuexTKcXHZF/dkDfvCwEyBAmgb4paFc3/WeYV2eE= +github.com/pingcap/tidb/pkg/parser v0.0.0-20250623120500-dfc0a21a9c60 h1:rLxX8nsaQAC7zYpMUVAP+KEFkRUMO6SQEXnsSYGOfD0= +github.com/pingcap/tidb/pkg/parser v0.0.0-20250623120500-dfc0a21a9c60/go.mod h1:mpCcwRdMnmvNkBxcT4AqiE0yuvfJTdmCJs7cfznJw1w= github.com/pingcap/tipb v0.0.0-20241212101007-246f91188357 h1:s58UXyaWMNeaoeuVPZdrkm5Uk7NcODHqICGCUQ3A9s4= github.com/pingcap/tipb v0.0.0-20241212101007-246f91188357/go.mod h1:zrnYy8vReNODg8G0OiYaX9OK+kpq+rK1jHmvd1DnIWw= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= @@ -538,8 +547,8 @@ github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8 github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= -github.com/prometheus/common v0.64.0 h1:pdZeA+g617P7oGv1CzdTzyeShxAGrTBsolKNOLQPGO4= -github.com/prometheus/common v0.64.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= +github.com/prometheus/common v0.65.0 h1:QDwzd+G1twt//Kwj/Ww6E9FQq1iVMmODnILtW1t2VzE= +github.com/prometheus/common v0.65.0/go.mod h1:0gZns+BLRQ3V6NdaerOhMbwwRbNh9hkGINtQAsP5GS8= github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= github.com/qri-io/jsonpointer v0.1.1 h1:prVZBZLL6TW5vsSB9fFHFAMBLI4b0ri5vribQlTJiBA= @@ -572,8 +581,8 @@ github.com/shopspring/decimal v1.4.0/go.mod h1:gawqmDU56v4yIKSwfBSFip1HdCCXN8/+D github.com/sirupsen/logrus v1.4.2/go.mod h1:tLMulIdttU9McNUspp0xgXVQah82FyeX6MwdIuYE2rE= github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ= github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ= -github.com/slack-go/slack v0.17.0 h1:Vqd4GGIcwwgEu80GBs3cXoPPho5bkDGSFnuZbSG0NhA= -github.com/slack-go/slack v0.17.0/go.mod h1:X+UqOufi3LYQHDnMG1vxf0J8asC6+WllXrVrhl8/Prk= +github.com/slack-go/slack v0.17.1 h1:x0Mnc6biHBea5vfxLR+x4JFl/Rm3eIo0iS3xDZenX+o= +github.com/slack-go/slack v0.17.1/go.mod h1:X+UqOufi3LYQHDnMG1vxf0J8asC6+WllXrVrhl8/Prk= github.com/snowflakedb/gosnowflake v1.14.1 h1:FnnlaSAm6Zyq3ujqa0JmeU1Ivj7Iz+A0C2YGV6nbRSw= github.com/snowflakedb/gosnowflake v1.14.1/go.mod h1:+3Eh8swS12G6Fbt/wb5Vcse2Id7VU9HGgKSH8ydiumU= github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= @@ -600,16 +609,18 @@ github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf github.com/tiancaiamao/gp v0.0.0-20230126082955-4f9e4f1ed9b5 h1:4bvGDLXwsP4edNa9igJz+oU1kmZ6S3PSjrnOFgh5Xwk= github.com/tiancaiamao/gp v0.0.0-20230126082955-4f9e4f1ed9b5/go.mod h1:h4xBhSNtOeEosLJ4P7JyKXX7Cabg7AVkWCK5gV2vOrM= github.com/tidwall/pretty v1.0.0/go.mod h1:XNkn88O1ChpSDQmQeStsy+sBenx6DDtFZJxhVysOjyk= -github.com/tikv/pd/client v0.0.0-20250530134205-67b518dbb195 h1:0DahoGgSZWitxl+t1p0a9Dp8HkiZRylUF8wnDMis2Hs= -github.com/tikv/pd/client v0.0.0-20250530134205-67b518dbb195/go.mod h1:yc63HG/FHgJNvfDPqMOciMtOju1QDYaxajqyN6rnFX0= +github.com/tikv/pd/client v0.0.0-20250623084542-60788950a745 h1:p6kmQprZcw3qC6yljdE/hPzkDULYH65v9BJJJp1doxs= +github.com/tikv/pd/client v0.0.0-20250623084542-60788950a745/go.mod h1:yc63HG/FHgJNvfDPqMOciMtOju1QDYaxajqyN6rnFX0= github.com/tklauser/go-sysconf v0.3.9/go.mod h1:11DU/5sG7UexIrp/O6g35hrWzu0JxlwQ3LSFUzyeuhs= github.com/tklauser/go-sysconf v0.3.15 h1:VE89k0criAymJ/Os65CSn1IXaol+1wrsFHEB8Ol49K4= github.com/tklauser/go-sysconf v0.3.15/go.mod h1:Dmjwr6tYFIseJw7a3dRLJfsHAMXZ3nEnL/aZY+0IuI4= github.com/tklauser/numcpus v0.3.0/go.mod h1:yFGUr7TUHQRAhyqBcEg0Ge34zDBAsIvJJcyE6boqnA8= github.com/tklauser/numcpus v0.10.0 h1:18njr6LDBk1zuna922MgdjQuJFjrdppsZG60sHGfjso= github.com/tklauser/numcpus v0.10.0/go.mod h1:BiTKazU708GQTYF4mB+cmlpT2Is1gLk7XVuEeem8LsQ= -github.com/twmb/franz-go v1.19.4 h1:0ktflzm5YU7+YYdie8RQWFcU9uDJ03xLefplO1iMwO4= -github.com/twmb/franz-go v1.19.4/go.mod h1:4kFJ5tmbbl7asgwAGVuyG1ZMx0NNpYk7EqflvWfPCpM= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc h1:9lRDQMhESg+zvGYmW5DyG0UqvY96Bu5QYsTLvCHdrgo= +github.com/tmthrgd/go-hex v0.0.0-20190904060850-447a3041c3bc/go.mod h1:bciPuU6GHm1iF1pBvUfxfsH0Wmnc2VbpgvbI9ZWuIRs= +github.com/twmb/franz-go v1.19.5 h1:W7+o8D0RsQsedqib71OVlLeZ0zI6CbFra7yTYhZTs5Y= +github.com/twmb/franz-go v1.19.5/go.mod h1:4kFJ5tmbbl7asgwAGVuyG1ZMx0NNpYk7EqflvWfPCpM= github.com/twmb/franz-go/pkg/kadm v1.16.0 h1:STMs1t5lYR5mR974PSiwNzE5TvsosByTp+rKXLOhAjE= github.com/twmb/franz-go/pkg/kadm v1.16.0/go.mod h1:MUdcUtnf9ph4SFBLLA/XxE29rvLhWYLM9Ygb8dfSCvw= github.com/twmb/franz-go/pkg/kmsg v1.11.2 h1:hIw75FpwcAjgeyfIGFqivAvwC5uNIOWRGvQgZhH4mhg= @@ -624,13 +635,32 @@ github.com/uber/jaeger-client-go v2.22.1+incompatible h1:NHcubEkVbahf9t3p75TOCR8 github.com/uber/jaeger-client-go v2.22.1+incompatible/go.mod h1:WVhlPFC8FDjOFMMWRy2pZqQJSXxYSwNYOkTr/Z6d3Kk= github.com/uber/jaeger-lib v2.4.1+incompatible h1:td4jdvLcExb4cBISKIpHuGoVXh+dVKhn2Um6rjCsSsg= github.com/uber/jaeger-lib v2.4.1+incompatible/go.mod h1:ComeNDZlWwrWnDv8aPp0Ba6+uUTzImX/AauajbLI56U= -github.com/urfave/cli/v3 v3.3.3 h1:byCBaVdIXuLPIDm5CYZRVG6NvT7tv1ECqdU4YzlEa3I= -github.com/urfave/cli/v3 v3.3.3/go.mod h1:FJSKtM/9AiiTOJL4fJ6TbMUkxBXn7GO9guZqoZtpYpo= +github.com/uptrace/bun v1.1.12 h1:sOjDVHxNTuM6dNGaba0wUuz7KvDE1BmNu9Gqs2gJSXQ= +github.com/uptrace/bun v1.1.12/go.mod h1:NPG6JGULBeQ9IU6yHp7YGELRa5Agmd7ATZdz4tGZ6z0= +github.com/uptrace/bun/dialect/pgdialect v1.1.12 h1:m/CM1UfOkoBTglGO5CUTKnIKKOApOYxkcP2qn0F9tJk= +github.com/uptrace/bun/dialect/pgdialect v1.1.12/go.mod h1:Ij6WIxQILxLlL2frUBxUBOZJtLElD2QQNDcu/PWDHTc= +github.com/uptrace/bun/driver/pgdriver v1.1.12 h1:3rRWB1GK0psTJrHwxzNfEij2MLibggiLdTqjTtfHc1w= +github.com/uptrace/bun/driver/pgdriver v1.1.12/go.mod h1:ssYUP+qwSEgeDDS1xm2XBip9el1y9Mi5mTAvLoiADLM= +github.com/urfave/cli/v3 v3.3.8 h1:BzolUExliMdet9NlJ/u4m5vHSotJ3PzEqSAZ1oPMa/E= +github.com/urfave/cli/v3 v3.3.8/go.mod h1:FJSKtM/9AiiTOJL4fJ6TbMUkxBXn7GO9guZqoZtpYpo= +github.com/vmihailenco/bufpool v0.1.11 h1:gOq2WmBrq0i2yW5QJ16ykccQ4wH9UyEsgLm6czKAd94= +github.com/vmihailenco/bufpool v0.1.11/go.mod h1:AFf/MOy3l2CFTKbxwt0mp2MwnqjNEs5H/UxrkA5jxTQ= +github.com/vmihailenco/msgpack/v5 v5.3.5 h1:5gO0H1iULLWGhs2H5tbAHIZTV8/cYafcFOr9znI5mJU= +github.com/vmihailenco/msgpack/v5 v5.3.5/go.mod h1:7xyJ9e+0+9SaZT0Wt1RGleJXzli6Q/V5KbhBonMG9jc= +github.com/vmihailenco/tagparser v0.1.2 h1:gnjoVuB/kljJ5wICEEOpx98oXMWPLj22G67Vbd1qPqc= +github.com/vmihailenco/tagparser v0.1.2/go.mod h1:OeAg3pn3UbLjkWt+rN9oFYB6u/cQgqMEUPoW2WPyhdI= +github.com/vmihailenco/tagparser/v2 v2.0.0 h1:y09buUbR+b5aycVFQs/g70pqKVZNBmxwAhO7/IwNM9g= +github.com/vmihailenco/tagparser/v2 v2.0.0/go.mod h1:Wri+At7QHww0WTrCBeu4J6bNtoV6mEfg5OIWRZA9qds= github.com/x448/float16 v0.8.4 h1:qLwI1I70+NjRFUR3zs1JPUCgaCXSh3SW62uAKT1mSBM= github.com/x448/float16 v0.8.4/go.mod h1:14CWIYCyZA/cWjXOioeEpHeN/83MdbZDRQHoFcYsOfg= +github.com/xdg-go/pbkdf2 v1.0.0 h1:Su7DPu48wXMwC3bs7MCNG+z4FhcyEuz5dlvchbq0B0c= github.com/xdg-go/pbkdf2 v1.0.0/go.mod h1:jrpuAogTd400dnrH08LKmI/xc1MbPOebTwRqcT5RDeI= github.com/xdg-go/scram v1.1.1/go.mod h1:RaEWvsqvNKKvBPvcKeFjrG2cJqOkHTiyTpzz23ni57g= +github.com/xdg-go/scram v1.1.2 h1:FHX5I5B4i4hKRVRBCFRxq1iQRej7WO3hhBuJf+UUySY= +github.com/xdg-go/scram v1.1.2/go.mod h1:RT/sEzTbU5y00aCK8UOx6R7YryM0iF1N2MOmC3kKLN4= github.com/xdg-go/stringprep v1.0.3/go.mod h1:W3f5j4i+9rC0kuIEJL0ky1VpHXQU3ocBgklLGvcBnW8= +github.com/xdg-go/stringprep v1.0.4 h1:XLI/Ng3O1Atzq0oBs3TWm+5ZVgkq2aqdlvP9JtoZ6c8= +github.com/xdg-go/stringprep v1.0.4/go.mod h1:mPGuuIYwz7CmR2bT9j4GbQqutWS1zV24gijq1dTyGkM= github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZqKjWU= github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E= github.com/youmark/pkcs8 v0.0.0-20181117223130-1be2e3e5546d/go.mod h1:rHwXgn7JulP+udvsHwJoVG1YGAP6VLg4y9I5dyZdqmA= @@ -639,6 +669,7 @@ github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78/go.mod h1:aL8wCCfTfS github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k= +github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY= github.com/yuin/gopher-lua v1.1.1 h1:kYKnWBjvbNP4XLT3+bPEwAXJx262OhaHDWDVOPjL46M= github.com/yuin/gopher-lua v1.1.1/go.mod h1:GBR0iDaNXjAgGg9zfCvksxSRnQx76gclCIb7kdAd1Pw= github.com/yusufpapurcu/wmi v1.2.2/go.mod h1:SBZ9tNy3G9/m5Oi98Zks0QjeHVDvuK0qfxQmPyzfmi0= @@ -652,13 +683,15 @@ github.com/zeebo/xxh3 v1.0.2 h1:xZmwmqxHZA8AI603jOQ0tMqmBr9lPeFwGg6d+xy9DC0= github.com/zeebo/xxh3 v1.0.2/go.mod h1:5NWz9Sef7zIDm2JHfFlcQvNekmcEl9ekUZQQKCYaDcA= go.einride.tech/aip v0.68.1 h1:16/AfSxcQISGN5z9C5lM+0mLYXihrHbQ1onvYTr93aQ= go.einride.tech/aip v0.68.1/go.mod h1:XaFtaj4HuA3Zwk9xoBtTWgNubZ0ZZXv9BZJCkuKuWbg= -go.etcd.io/etcd/api/v3 v3.6.0 h1:vdbkcUBGLf1vfopoGE/uS3Nv0KPyIpUV/HM6w9yx2kM= -go.etcd.io/etcd/api/v3 v3.6.0/go.mod h1:Wt5yZqEmxgTNJGHob7mTVBJDZNXiHPtXTcPab37iFOw= -go.etcd.io/etcd/client/pkg/v3 v3.6.0 h1:nchnPqpuxvv3UuGGHaz0DQKYi5EIW5wOYsgUNRc365k= -go.etcd.io/etcd/client/pkg/v3 v3.6.0/go.mod h1:Jv5SFWMnGvIBn8o3OaBq/PnT0jjsX8iNokAUessNjoA= -go.etcd.io/etcd/client/v3 v3.6.0 h1:/yjKzD+HW5v/3DVj9tpwFxzNbu8hjcKID183ug9duWk= -go.etcd.io/etcd/client/v3 v3.6.0/go.mod h1:Jzk/Knqe06pkOZPHXsQ0+vNDvMQrgIqJ0W8DwPdMJMg= +go.etcd.io/etcd/api/v3 v3.6.1 h1:yJ9WlDih9HT457QPuHt/TH/XtsdN2tubyxyQHSHPsEo= +go.etcd.io/etcd/api/v3 v3.6.1/go.mod h1:lnfuqoGsXMlZdTJlact3IB56o3bWp1DIlXPIGKRArto= +go.etcd.io/etcd/client/pkg/v3 v3.6.1 h1:CxDVv8ggphmamrXM4Of8aCC8QHzDM4tGcVr9p2BSoGk= +go.etcd.io/etcd/client/pkg/v3 v3.6.1/go.mod h1:aTkCp+6ixcVTZmrJGa7/Mc5nMNs59PEgBbq+HCmWyMc= +go.etcd.io/etcd/client/v3 v3.6.1 h1:KelkcizJGsskUXlsxjVrSmINvMMga0VWwFF0tSPGEP0= +go.etcd.io/etcd/client/v3 v3.6.1/go.mod h1:fCbPUdjWNLfx1A6ATo9syUmFVxqHH9bCnPLBZmnLmMY= go.mongodb.org/mongo-driver v1.11.4/go.mod h1:PTSz5yu21bkT/wXpkS7WR5f0ddqw5quethTUn9WM+2g= +go.mongodb.org/mongo-driver/v2 v2.2.2 h1:9cYuS3fl1Xhqwpfazso10V7BHQD58kCgtzhfAmJYz9c= +go.mongodb.org/mongo-driver/v2 v2.2.2/go.mod h1:qQkDMhCGWl3FN509DfdPd4GRBLU/41zqF/k8eTRceps= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA= @@ -687,8 +720,8 @@ go.opentelemetry.io/otel/trace v1.36.0 h1:ahxWNuqZjpdiFAyrIoQ4GIiAIhxAunQR6MUoKr go.opentelemetry.io/otel/trace v1.36.0/go.mod h1:gQ+OnDZzrybY4k4seLzPAWNwVBBVlF2szhehOBB/tGA= go.opentelemetry.io/proto/otlp v1.7.0 h1:jX1VolD6nHuFzOYso2E73H85i92Mv8JQYk0K9vz09os= go.opentelemetry.io/proto/otlp v1.7.0/go.mod h1:fSKjH6YJ7HDlwzltzyMj036AJ3ejJLCgCSHGj4efDDo= -go.temporal.io/api v1.49.1 h1:CdiIohibamF4YP9k261DjrzPVnuomRoh1iC//gZ1puA= -go.temporal.io/api v1.49.1/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= +go.temporal.io/api v1.50.0 h1:7s8Cn+fKfNx9G0v2Ge9We6X2WiCA3JvJ9JryeNbx1Bc= +go.temporal.io/api v1.50.0/go.mod h1:iaxoP/9OXMJcQkETTECfwYq4cw/bj4nwov8b3ZLVnXM= go.temporal.io/sdk v1.34.0 h1:VLg/h6ny7GvLFVoQPqz2NcC93V9yXboQwblkRvZ1cZE= go.temporal.io/sdk v1.34.0/go.mod h1:iE4U5vFrH3asOhqpBBphpj9zNtw8btp8+MSaf5A0D3w= go.temporal.io/sdk/contrib/opentelemetry v0.6.0 h1:rNBArDj5iTUkcMwKocUShoAW59o6HdS7Nq4CTp4ldj8= @@ -725,12 +758,13 @@ golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACk golang.org/x/crypto v0.0.0-20190510104115-cbcb75029529/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= +golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= -golang.org/x/crypto v0.38.0 h1:jt+WWG8IZlBnVbomuhg2Mdq0+BBQaHbtqHEFEigjUV8= -golang.org/x/crypto v0.38.0/go.mod h1:MvrbAqul58NNYPKnOra203SB9vpuZW0e+RRZV+Ggqjw= +golang.org/x/crypto v0.39.0 h1:SHs+kF4LP+f+p14esP5jAoDpHU8Gu/v9lFRK6IT5imM= +golang.org/x/crypto v0.39.0/go.mod h1:L+Xg3Wf6HoL4Bn4238Z6ft6KfEpN0tJGo53AAPC632U= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b h1:QoALfVG9rhQ/M7vYDScfPdWjGL9dlsVVM5VGh7aKoAA= -golang.org/x/exp v0.0.0-20250531010427-b6e5de432a8b/go.mod h1:U6Lno4MTRCDY+Ba7aCcauB9T60gsv5s4ralQzP72ZoQ= +golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b h1:M2rDM6z3Fhozi9O7NWsxAkg/yqS/lQJ6PmkyIV3YP+o= +golang.org/x/exp v0.0.0-20250620022241-b7579e27df2b/go.mod h1:3//PLf8L/X+8b4vuAfHzxeRUl04Adcb341+IGKfnqS8= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -741,8 +775,9 @@ golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzB golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= -golang.org/x/mod v0.24.0 h1:ZfthKaKaT4NrhGVZHO1/WDTwGES4De8KtWO0SIbNJMU= -golang.org/x/mod v0.24.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= +golang.org/x/mod v0.6.0-dev.0.20220419223038-86c51ed26bb4/go.mod h1:jJ57K6gSWd91VN4djpZkiMVwK6gcyfeH4XE8wZrZaV4= +golang.org/x/mod v0.25.0 h1:n7a+ZbQKQA/Ysbyb0/6IbB1H/X41mKgbhfv7AfG/44w= +golang.org/x/mod v0.25.0/go.mod h1:IXM97Txy2VM4PJ3gI61r1YEk/gAj6zAHN3AdZt6S9Ww= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20181005035420-146acd28ed58/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= @@ -754,10 +789,12 @@ golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLL golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96bSt6lcn1PtDYWL6XObtHCRCNQM= golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= -golang.org/x/net v0.40.0 h1:79Xs7wF06Gbdcg4kdCCIQArK11Z1hr5POQ6+fIYHNuY= -golang.org/x/net v0.40.0/go.mod h1:y0hY0exeL2Pku80/zKK7tpntoX23cqL3Oa6njdgRtds= +golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c= +golang.org/x/net v0.41.0 h1:vBTly1HeNPEn3wtREYfy4GZ/NECgw2Cnl+nK6Nz3uvw= +golang.org/x/net v0.41.0/go.mod h1:B/K4NNqkfmg07DQYrbwvSluqCJOOXwUjeb/5lOisjbA= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.30.0 h1:dnDm7JmhM45NNpd8FDDeLhK6FwqbOf4MLCM9zb1BOHI= golang.org/x/oauth2 v0.30.0/go.mod h1:B++QgG3ZKulg6sRPGD/mqlHQs5rB3Ml9erfeDY7xKlU= @@ -768,8 +805,9 @@ golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJ golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.14.0 h1:woo0S4Yywslg6hp4eUFjTVOyKt0RookbpAHG4c1HmhQ= -golang.org/x/sync v0.14.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= +golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.15.0 h1:KWH3jNZsfyT6xfAfKiz6MRNmd46ByHDYaZ7KSkCtdW8= +golang.org/x/sync v0.15.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -786,21 +824,25 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc golang.org/x/sys v0.0.0-20210816074244-15123e1e1f71/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20211025201205-69cdffdb9359/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.33.0 h1:q3i8TbbEz+JRD9ywIRlyRAQbM0qF7hu24q3teo2hbuw= golang.org/x/sys v0.33.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.32.0 h1:DR4lr0TjUs3epypdhTOkMmuF5CDFJ/8pOnbzMZPQ7bg= golang.org/x/term v0.32.0/go.mod h1:uZG1FhGx848Sqfsq4/DlJr3xGGsYMu/L5GW4abiaEPQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= -golang.org/x/text v0.25.0 h1:qVyWApTSYLk/drJRO5mDlNYskwQznZmkpV2c8q9zls4= -golang.org/x/text v0.25.0/go.mod h1:WEdwpYrmk1qmdHvhkSTNPm3app7v4rsT8F2UD6+VHIA= -golang.org/x/time v0.11.0 h1:/bpjEDfN9tkoN/ryeYHnv5hcMlc8ncjMcM4XBk5NWV0= -golang.org/x/time v0.11.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= +golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= +golang.org/x/text v0.26.0 h1:P42AVeLghgTYr4+xUnTRKDMqpar+PtX7KWuNQL21L8M= +golang.org/x/text v0.26.0/go.mod h1:QK15LZJUUQVJxhz7wXgxSy/CJaTFjd0G+YLonydOVQA= +golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE= +golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20181030221726-6c7e314b6563/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -816,8 +858,9 @@ golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapK golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE= golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= -golang.org/x/tools v0.33.0 h1:4qz2S3zmRxbGIhDIAgjxvFutSvH5EfnsYrRBj0UI0bc= -golang.org/x/tools v0.33.0/go.mod h1:CIJMaWEY88juyUfo7UbgPqbC8rU2OqfAV1h2Qp0oMYI= +golang.org/x/tools v0.1.12/go.mod h1:hNGJHUnrk76NpqgfD5Aqm5Crs+Hm0VOH/i9J2+nxYbc= +golang.org/x/tools v0.34.0 h1:qIpSLOxeCYGg9TrcJokLBG4KFA6d795g0xkBkiESGlo= +golang.org/x/tools v0.34.0/go.mod h1:pAP9OwEaY1CAW3HOmg3hLZC5Z0CCmzjAF2UQMSqNARg= golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= @@ -826,8 +869,8 @@ golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da h1:noIWHXmPHxILtqtCOPIhS golang.org/x/xerrors v0.0.0-20240903120638-7835f813f4da/go.mod h1:NDW/Ps6MPRej6fsCIbMTohpP40sJ/P/vI1MoTEGwX90= gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk= gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E= -google.golang.org/api v0.235.0 h1:C3MkpQSRxS1Jy6AkzTGKKrpSCOd2WOGrezZ+icKSkKo= -google.golang.org/api v0.235.0/go.mod h1:QpeJkemzkFKe5VCE/PMv7GsUfn9ZF+u+q1Q7w6ckxTg= +google.golang.org/api v0.238.0 h1:+EldkglWIg/pWjkq97sd+XxH7PxakNYoe/rkSTbnvOs= +google.golang.org/api v0.238.0/go.mod h1:cOVEm2TpdAGHL2z+UwyS+kmlGr3bVWQQ6sYEqkKje50= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= @@ -835,12 +878,12 @@ google.golang.org/genproto v0.0.0-20181004005441-af9cb2a35e7f/go.mod h1:JiN7NxoA google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= google.golang.org/genproto v0.0.0-20200423170343-7949de9c1215/go.mod h1:55QSHmfGQM9UVYDPBsyGGes0y52j32PQ3BqQfXhyH3c= google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= -google.golang.org/genproto v0.0.0-20250528174236-200df99c418a h1:KXuwdBmgjb4T3l4ZzXhP6HxxFKXD9FcK5/8qfJI4WwU= -google.golang.org/genproto v0.0.0-20250528174236-200df99c418a/go.mod h1:Nlk93rrS2X7rV8hiC2gh2A/AJspZhElz9Oh2KGsjLEY= -google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a h1:SGktgSolFCo75dnHJF2yMvnns6jCmHFJ0vE4Vn2JKvQ= -google.golang.org/genproto/googleapis/api v0.0.0-20250528174236-200df99c418a/go.mod h1:a77HrdMjoeKbnd2jmgcWdaS++ZLZAEq3orIOAEIKiVw= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a h1:v2PbRU4K3llS09c7zodFpNePeamkAwG3mPrAery9VeE= -google.golang.org/genproto/googleapis/rpc v0.0.0-20250528174236-200df99c418a/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= +google.golang.org/genproto v0.0.0-20250603155806-513f23925822 h1:rHWScKit0gvAPuOnu87KpaYtjK5zBMLcULh7gxkCXu4= +google.golang.org/genproto v0.0.0-20250603155806-513f23925822/go.mod h1:HubltRL7rMh0LfnQPkMH4NPDFEWp0jw3vixw7jEM53s= +google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 h1:oWVWY3NzT7KJppx2UKhKmzPq4SRe0LdCijVRwvGeikY= +google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822/go.mod h1:h3c4v36UTKzUiuaOKQ6gr3S+0hovBtUrXzTG/i3+XEc= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 h1:fc6jSaCT0vBduLYZHYrBBNY4dsWuvgyff9noRNDdBeE= +google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A= google.golang.org/grpc v0.0.0-20180607172857-7a6a684ca69e/go.mod h1:yo6s7OP7yaDglbqo1J04qKzAhqBH6lvTonzMVmEdcZw= google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= @@ -848,8 +891,8 @@ google.golang.org/grpc v1.25.1/go.mod h1:c3i+UQWmh7LiEpx4sFZnkU36qjEYZ0imhYfXVyQ google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= google.golang.org/grpc v1.29.1/go.mod h1:itym6AZVZYACWQqET3MqgPpjcuV5QH3BxFS3IjizoKk= google.golang.org/grpc v1.33.2/go.mod h1:JMHMWHQWaTccqQQlmk3MJZS+GWXOdAesneDmEnv2fbc= -google.golang.org/grpc v1.72.2 h1:TdbGzwb82ty4OusHWepvFWGLgIbNo1/SUynEN0ssqv8= -google.golang.org/grpc v1.72.2/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM= +google.golang.org/grpc v1.73.0 h1:VIWSmpI2MegBtTuFt5/JWy2oXxtjJ/e89Z70ImfD2ok= +google.golang.org/grpc v1.73.0/go.mod h1:50sbHOUqWoCQGI8V2HQLJM0B+LMlIUjNSZmow7EVBQc= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9 h1:ATnmU8nL2NfIyTSiBvJVDIDIr3qBmeW+c7z7XU21eWs= google.golang.org/grpc/examples v0.0.0-20231221225426-4f03f3ff32c9/go.mod h1:j5uROIAAgi3YmtiETMt1LW0d/lHqQ7wwrIY4uGRXLQ4= google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= @@ -888,21 +931,27 @@ gopkg.in/yaml.v3 v3.0.0-20200605160147-a5ece683394c/go.mod h1:K4uyk7z7BCEPqu6E+C gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= +gorm.io/driver/postgres v1.5.4 h1:Iyrp9Meh3GmbSuyIAGyjkN+n9K+GHX9b9MqsTL4EJCo= +gorm.io/driver/postgres v1.5.4/go.mod h1:Bgo89+h0CRcdA33Y6frlaHHVuTdOf87pmyzwW9C/BH0= +gorm.io/gorm v1.25.11 h1:/Wfyg1B/je1hnDx3sMkX+gAlxrlZpn6X0BXRlwXlvHg= +gorm.io/gorm v1.25.11/go.mod h1:xh7N7RHfYlNc5EmcI/El95gXusucDrQnHXe0+CgWcLQ= honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= -k8s.io/api v0.33.1 h1:tA6Cf3bHnLIrUK4IqEgb2v++/GYUtqiu9sRVk3iBXyw= -k8s.io/api v0.33.1/go.mod h1:87esjTn9DRSRTD4fWMXamiXxJhpOIREjWOSjsW1kEHw= -k8s.io/apimachinery v0.33.1 h1:mzqXWV8tW9Rw4VeW9rEkqvnxj59k1ezDUl20tFK/oM4= -k8s.io/apimachinery v0.33.1/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= -k8s.io/client-go v0.33.1 h1:ZZV/Ks2g92cyxWkRRnfUDsnhNn28eFpt26aGc8KbXF4= -k8s.io/client-go v0.33.1/go.mod h1:JAsUrl1ArO7uRVFWfcj6kOomSlCv+JpvIsp6usAGefA= +k8s.io/api v0.33.2 h1:YgwIS5jKfA+BZg//OQhkJNIfie/kmRsO0BmNaVSimvY= +k8s.io/api v0.33.2/go.mod h1:fhrbphQJSM2cXzCWgqU29xLDuks4mu7ti9vveEnpSXs= +k8s.io/apimachinery v0.33.2 h1:IHFVhqg59mb8PJWTLi8m1mAoepkUNYmptHsV+Z1m5jY= +k8s.io/apimachinery v0.33.2/go.mod h1:BHW0YOu7n22fFv/JkYOEfkUYNRN0fj0BlvMFWA7b+SM= +k8s.io/client-go v0.33.2 h1:z8CIcc0P581x/J1ZYf4CNzRKxRvQAwoAolYPbtQes+E= +k8s.io/client-go v0.33.2/go.mod h1:9mCgT4wROvL948w6f6ArJNb7yQd7QsvqavDeZHvNmHo= k8s.io/klog/v2 v2.130.1 h1:n9Xl7H1Xvksem4KFG4PYbdQCQxqc/tTUyrgXaOhHSzk= k8s.io/klog/v2 v2.130.1/go.mod h1:3Jpz1GvMt720eyJH1ckRHK1EDfpxISzJ7I9OYgaDtPE= -k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff h1:/usPimJzUKKu+m+TE36gUyGcf03XZEP0ZIKgKj35LS4= -k8s.io/kube-openapi v0.0.0-20250318190949-c8a335a9a2ff/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= -k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979 h1:jgJW5IePPXLGB8e/1wvd0Ich9QE97RvvF3a8J3fP/Lg= -k8s.io/utils v0.0.0-20250502105355-0f33e8f1c979/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a h1:ZV3Zr+/7s7aVbjNGICQt+ppKWsF1tehxggNfbM7XnG8= +k8s.io/kube-openapi v0.0.0-20250610211856-8b98d1ed966a/go.mod h1:5jIi+8yX4RIb8wk3XwBo5Pq2ccx4FP10ohkbSKCZoK8= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397 h1:hwvWFiBzdWw1FhfY1FooPn3kzWuJ8tmbZBHi4zVsl1Y= +k8s.io/utils v0.0.0-20250604170112-4c0f3b243397/go.mod h1:OLgZIPagt7ERELqWJFomSt595RzquPNLL48iOWgYOg0= +mellium.im/sasl v0.3.1 h1:wE0LW6g7U83vhvxjC1IY8DnXM+EU095yeo8XClvCdfo= +mellium.im/sasl v0.3.1/go.mod h1:xm59PUYpZHhgQ9ZqoJ5QaCqzWMi8IeS49dhp6plPCzw= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8 h1:gBQPwqORJ8d8/YNZWEjoZs7npUVDpVXUUOFfW6CgAqE= sigs.k8s.io/json v0.0.0-20241014173422-cfa47c3a1cc8/go.mod h1:mdzfpAEoE6DHQEN0uh9ZbOCuHbLK5wOm7dK4ctXE9Tg= sigs.k8s.io/randfill v0.0.0-20250304075658-069ef1bbf016/go.mod h1:XeLlZ/jmk4i1HRopwe7/aU3H5n1zNUcX6TM94b3QxOY= diff --git a/flow/internal/config.go b/flow/internal/config.go index 650b94c179..68619e1dd4 100644 --- a/flow/internal/config.go +++ b/flow/internal/config.go @@ -163,6 +163,14 @@ func PeerDBTemporalClientKey(ctx context.Context) ([]byte, error) { return GetKmsDecryptedEnvBase64EncodedBytes(ctx, "TEMPORAL_CLIENT_KEY", nil) } +func PeerDBTemporalClientCertPath() string { + return GetEnvString("TEMPORAL_CLIENT_CERT_PATH", "") +} + +func PeerDBTemporalClientKeyPath() string { + return GetEnvString("TEMPORAL_CLIENT_KEY_PATH", "") +} + func PeerDBGetIncidentIoUrl() string { return GetEnvString("PEERDB_INCIDENTIO_URL", "") } diff --git a/flow/internal/postgres.go b/flow/internal/postgres.go index e7205af281..48c5425674 100644 --- a/flow/internal/postgres.go +++ b/flow/internal/postgres.go @@ -22,11 +22,10 @@ func GetPGConnectionString(pgConfig *protos.PostgresConfig, flowName string) str // for a url like postgres://user:password@host:port/dbname connString := fmt.Sprintf( - "postgres://%s:%s@%s:%d/%s?application_name=%s&client_encoding=UTF8", + "postgres://%s:%s@%s/%s?application_name=%s&client_encoding=UTF8", pgConfig.User, passwordEscaped, - pgConfig.Host, - pgConfig.Port, + shared.JoinHostPort(pgConfig.Host, pgConfig.Port), pgConfig.Database, applicationName, ) diff --git a/flow/model/conversion_avro.go b/flow/model/conversion_avro.go index 3c355a64c1..fa17a48b32 100644 --- a/flow/model/conversion_avro.go +++ b/flow/model/conversion_avro.go @@ -11,6 +11,7 @@ import ( "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type QRecordAvroConverter struct { @@ -50,8 +51,9 @@ func NewQRecordAvroConverter( func (qac *QRecordAvroConverter) Convert( ctx context.Context, env map[string]string, - qrecord []qvalue.QValue, - typeConversions map[string]qvalue.TypeConversion, + qrecord []types.QValue, + typeConversions map[string]types.TypeConversion, + numericTruncator *SnapshotTableNumericTruncator, ) (map[string]any, error) { m := make(map[string]any, len(qrecord)) for idx, val := range qrecord { @@ -61,6 +63,7 @@ func (qac *QRecordAvroConverter) Convert( avroVal, err := qvalue.QValueToAvro( ctx, env, val, &qac.Schema.Fields[idx], qac.TargetDWH, qac.logger, qac.UnboundedNumericAsString, + numericTruncator.Get(idx), ) if err != nil { return nil, fmt.Errorf("failed to convert QValue to Avro-compatible value: %w", err) @@ -85,14 +88,14 @@ type QRecordAvroSchema struct { type QRecordAvroSchemaDefinition struct { Schema *avro.RecordSchema - Fields []qvalue.QField + Fields []types.QField } func GetAvroSchemaDefinition( ctx context.Context, env map[string]string, dstTableName string, - qRecordSchema qvalue.QRecordSchema, + qRecordSchema types.QRecordSchema, targetDWH protos.DBType, avroNameMap map[string]string, ) (*QRecordAvroSchemaDefinition, error) { @@ -134,7 +137,7 @@ func GetAvroSchemaDefinition( }, nil } -func ConstructColumnNameAvroFieldMap(fields []qvalue.QField) map[string]string { +func ConstructColumnNameAvroFieldMap(fields []types.QField) map[string]string { m := make(map[string]string, len(fields)) for i, field := range fields { m[field.Name] = qvalue.ConvertToAvroCompatibleName(field.Name) + "_" + strconv.FormatInt(int64(i), 10) diff --git a/flow/model/model.go b/flow/model/model.go index 94c299a197..19572c047e 100644 --- a/flow/model/model.go +++ b/flow/model/model.go @@ -10,6 +10,7 @@ import ( "github.com/jackc/pglogrepl" "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/shared" ) type NameAndExclude struct { @@ -35,20 +36,26 @@ type RecordTypeCounts struct { } type RecordsToStreamRequest[T Items] struct { - records <-chan Record[T] - TableMapping map[string]*RecordTypeCounts - BatchID int64 + records <-chan Record[T] + TableMapping map[string]*RecordTypeCounts + BatchID int64 + UnboundedNumericAsString bool + TargetDWH protos.DBType } func NewRecordsToStreamRequest[T Items]( records <-chan Record[T], tableMapping map[string]*RecordTypeCounts, batchID int64, + unboundedNumericAsString bool, + targetDWH protos.DBType, ) *RecordsToStreamRequest[T] { return &RecordsToStreamRequest[T]{ - records: records, - TableMapping: tableMapping, - BatchID: batchID, + records: records, + TableMapping: tableMapping, + BatchID: batchID, + UnboundedNumericAsString: unboundedNumericAsString, + TargetDWH: targetDWH, } } @@ -79,6 +86,8 @@ type PullRecordsRequest[T Items] struct { LastOffset CdcCheckpoint // MaxBatchSize is the max number of records to fetch. MaxBatchSize uint32 + // peerdb versioning to prevent breaking changes + InternalVersion uint32 // IdleTimeout is the timeout to wait for new records. IdleTimeout time.Duration } @@ -146,6 +155,7 @@ type SyncRecordsRequest[T Items] struct { // source:destination mappings TableMappings []*protos.TableMapping SyncBatchID int64 + Version uint32 } type NormalizeRecordsRequest struct { @@ -156,8 +166,10 @@ type NormalizeRecordsRequest struct { SyncedAtColName string TableMappings []*protos.TableMapping SyncBatchID int64 + Version uint32 } +//nolint:govet // no need to save on fieldalignment type SyncResponse struct { // TableNameRowsMapping tells how many records need to be synced to each destination table. TableNameRowsMapping map[string]*RecordTypeCounts @@ -168,6 +180,7 @@ type SyncResponse struct { // NumRecordsSynced is the number of records that were synced. NumRecordsSynced int64 CurrentSyncBatchID int64 + Warnings shared.QRepWarnings } type NormalizeResponse struct { diff --git a/flow/model/numeric_truncator.go b/flow/model/numeric_truncator.go new file mode 100644 index 0000000000..fcc0b4e227 --- /dev/null +++ b/flow/model/numeric_truncator.go @@ -0,0 +1,120 @@ +package model + +import ( + "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +type StreamNumericTruncator map[string]*CdcTableNumericTruncator + +func NewStreamNumericTruncator(tableMappings []*protos.TableMapping, typesToSkip map[string]struct{}) StreamNumericTruncator { + statsByTable := make(map[string]*CdcTableNumericTruncator, len(tableMappings)) + for _, tableMapping := range tableMappings { + statsByTable[tableMapping.DestinationTableIdentifier] = NewCdcTableNumericTruncator( + tableMapping.DestinationTableIdentifier, tableMapping.Columns, typesToSkip) + } + return statsByTable +} + +func (ss StreamNumericTruncator) Get(destinationTable string) *CdcTableNumericTruncator { + if ss == nil { + return nil + } + truncator, ok := ss[destinationTable] + if !ok { + truncator = NewCdcTableNumericTruncator(destinationTable, nil, nil) + ss[destinationTable] = truncator + } + return truncator +} + +func (ss StreamNumericTruncator) Warnings() shared.QRepWarnings { + var warnings shared.QRepWarnings + for _, tableStats := range ss { + tableStats.CollectWarnings(&warnings) + } + return warnings +} + +type CdcTableNumericTruncator struct { + TruncatorsByColumn map[string]*CdcColumnNumericTruncator + DestinationTable string +} + +func NewCdcTableNumericTruncator( + destinationTable string, columnSettings []*protos.ColumnSetting, typesToSkip map[string]struct{}, +) *CdcTableNumericTruncator { + truncatorsByColumn := map[string]*CdcColumnNumericTruncator{} + for _, columnSetting := range columnSettings { + if _, ok := typesToSkip[columnSetting.DestinationType]; ok { + destinationName := columnSetting.DestinationName + if destinationName == "" { + destinationName = columnSetting.SourceName + } + truncatorsByColumn[destinationName] = &CdcColumnNumericTruncator{Skip: true} + } + } + return &CdcTableNumericTruncator{ + TruncatorsByColumn: truncatorsByColumn, + DestinationTable: destinationTable, + } +} + +func (ts *CdcTableNumericTruncator) Get(destinationColumn string) *CdcColumnNumericTruncator { + if ts == nil { + return &CdcColumnNumericTruncator{Skip: true} + } + stat, ok := ts.TruncatorsByColumn[destinationColumn] + if !ok { + stat = &CdcColumnNumericTruncator{ + Stat: qvalue.NewNumericStat(ts.DestinationTable, destinationColumn), + } + ts.TruncatorsByColumn[destinationColumn] = stat + } + return stat +} + +func (ts *CdcTableNumericTruncator) CollectWarnings(warnings *shared.QRepWarnings) { + for _, truncator := range ts.TruncatorsByColumn { + if !truncator.Skip { + truncator.Stat.CollectWarnings(warnings) + } + } +} + +//nolint:govet // semantically ordered +type CdcColumnNumericTruncator struct { + Skip bool + Stat *qvalue.NumericStat +} + +type SnapshotTableNumericTruncator struct { + stats []*qvalue.NumericStat +} + +func NewSnapshotTableNumericTruncator(destinationTable string, fields []types.QField) *SnapshotTableNumericTruncator { + stats := make([]*qvalue.NumericStat, 0, len(fields)) + for _, field := range fields { + stats = append(stats, qvalue.NewNumericStat(destinationTable, field.Name)) + } + return &SnapshotTableNumericTruncator{ + stats: stats, + } +} + +func (ts *SnapshotTableNumericTruncator) Get(idx int) *qvalue.NumericStat { + if ts == nil { + return nil + } + return ts.stats[idx] +} + +func (ts *SnapshotTableNumericTruncator) Warnings() shared.QRepWarnings { + var warnings shared.QRepWarnings + for _, stat := range ts.stats { + stat.CollectWarnings(&warnings) + } + return warnings +} diff --git a/flow/model/qrecord_batch.go b/flow/model/qrecord_batch.go index ad8ff25607..d01585f919 100644 --- a/flow/model/qrecord_batch.go +++ b/flow/model/qrecord_batch.go @@ -1,13 +1,13 @@ package model import ( - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) // QRecordBatch holds a batch of []QValue slices type QRecordBatch struct { - Schema qvalue.QRecordSchema - Records [][]qvalue.QValue + Schema types.QRecordSchema + Records [][]types.QValue } func (q *QRecordBatch) ToQRecordStream(buffer int) *QRecordStream { diff --git a/flow/model/qrecord_copy_from_source.go b/flow/model/qrecord_copy_from_source.go index 48255e8d58..238ba91d72 100644 --- a/flow/model/qrecord_copy_from_source.go +++ b/flow/model/qrecord_copy_from_source.go @@ -4,11 +4,12 @@ import ( "encoding/json" "fmt" "strings" + "time" "github.com/jackc/pgx/v5/pgtype" - geo "github.com/PeerDB-io/peerdb/flow/datatypes" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + geo "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func constructArray[T any](v []T) *pgtype.Array[T] { @@ -21,7 +22,7 @@ func constructArray[T any](v []T) *pgtype.Array[T] { type QRecordCopyFromSource struct { stream *QRecordStream - currentRecord []qvalue.QValue + currentRecord []types.QValue } func NewQRecordCopyFromSource( @@ -63,101 +64,107 @@ func (src *QRecordCopyFromSource) Values() ([]any, error) { } switch v := qValue.(type) { - case qvalue.QValueFloat32: + case types.QValueFloat32: values[i] = v.Val - case qvalue.QValueFloat64: + case types.QValueFloat64: values[i] = v.Val - case qvalue.QValueInt8: + case types.QValueInt8: values[i] = v.Val - case qvalue.QValueInt16: + case types.QValueInt16: values[i] = v.Val - case qvalue.QValueInt32: + case types.QValueInt32: values[i] = v.Val - case qvalue.QValueInt64: + case types.QValueInt64: values[i] = v.Val - case qvalue.QValueUInt8: + case types.QValueUInt8: values[i] = v.Val - case qvalue.QValueUInt16: + case types.QValueUInt16: values[i] = v.Val - case qvalue.QValueUInt32: + case types.QValueUInt32: values[i] = v.Val - case qvalue.QValueUInt64: + case types.QValueUInt64: values[i] = v.Val - case qvalue.QValueBoolean: + case types.QValueBoolean: values[i] = v.Val - case qvalue.QValueQChar: + case types.QValueQChar: values[i] = rune(v.Val) - case qvalue.QValueString: + case types.QValueString: values[i] = v.Val - case qvalue.QValueEnum: + case types.QValueEnum: values[i] = v.Val - case qvalue.QValueCIDR: + case types.QValueCIDR: values[i] = v.Val - case qvalue.QValueINET: + case types.QValueINET: values[i] = v.Val - case qvalue.QValueMacaddr: + case types.QValueMacaddr: values[i] = v.Val - case qvalue.QValueTime: - values[i] = pgtype.Time{Microseconds: v.Val.UnixMicro(), Valid: true} - case qvalue.QValueTSTZRange: + case types.QValueTime: + values[i] = pgtype.Time{Microseconds: int64(v.Val / time.Microsecond), Valid: true} + case types.QValueTimeTZ: + values[i] = pgtype.Time{Microseconds: int64(v.Val / time.Microsecond), Valid: true} + case types.QValueInterval: values[i] = v.Val - case qvalue.QValueTimestamp: + case types.QValueTimestamp: values[i] = pgtype.Timestamp{Time: v.Val, Valid: true} - case qvalue.QValueTimestampTZ: + case types.QValueTimestampTZ: values[i] = pgtype.Timestamptz{Time: v.Val, Valid: true} - case qvalue.QValueUUID: + case types.QValueUUID: values[i] = v.Val - case qvalue.QValueNumeric: + case types.QValueNumeric: values[i] = v.Val - case qvalue.QValueBytes: + case types.QValueBytes: values[i] = v.Val - case qvalue.QValueDate: + case types.QValueDate: values[i] = pgtype.Date{Time: v.Val, Valid: true} - case qvalue.QValueHStore: + case types.QValueHStore: values[i] = v.Val - case qvalue.QValueGeography: + case types.QValueGeography: wkb, err := geoWktToWkb(v.Val) if err != nil { return nil, err } values[i] = wkb - case qvalue.QValueGeometry: + case types.QValueGeometry: wkb, err := geoWktToWkb(v.Val) if err != nil { return nil, err } values[i] = wkb - case qvalue.QValuePoint: + case types.QValuePoint: wkb, err := geoWktToWkb(v.Val) if err != nil { return nil, err } values[i] = wkb - case qvalue.QValueArrayString: + case types.QValueArrayString: values[i] = constructArray(v.Val) - case qvalue.QValueArrayEnum: + case types.QValueArrayEnum: values[i] = constructArray(v.Val) - case qvalue.QValueArrayDate: + case types.QValueArrayDate: values[i] = constructArray(v.Val) - case qvalue.QValueArrayTimestamp: + case types.QValueArrayInterval: values[i] = constructArray(v.Val) - case qvalue.QValueArrayTimestampTZ: + case types.QValueArrayTimestamp: values[i] = constructArray(v.Val) - case qvalue.QValueArrayInt16: + case types.QValueArrayTimestampTZ: values[i] = constructArray(v.Val) - case qvalue.QValueArrayInt32: + case types.QValueArrayInt16: values[i] = constructArray(v.Val) - case qvalue.QValueArrayInt64: + case types.QValueArrayInt32: values[i] = constructArray(v.Val) - case qvalue.QValueArrayFloat32: + case types.QValueArrayInt64: values[i] = constructArray(v.Val) - case qvalue.QValueArrayFloat64: + case types.QValueArrayFloat32: values[i] = constructArray(v.Val) - case qvalue.QValueArrayBoolean: + case types.QValueArrayFloat64: values[i] = constructArray(v.Val) - case qvalue.QValueArrayUUID: + case types.QValueArrayBoolean: values[i] = constructArray(v.Val) - case qvalue.QValueJSON: + case types.QValueArrayUUID: + values[i] = constructArray(v.Val) + case types.QValueArrayNumeric: + values[i] = constructArray(v.Val) + case types.QValueJSON: if v.IsArray { var arrayJ []any if err := json.Unmarshal([]byte(v.Val), &arrayJ); err != nil { diff --git a/flow/model/qrecord_stream.go b/flow/model/qrecord_stream.go index 27215c62dc..b47862ec0e 100644 --- a/flow/model/qrecord_stream.go +++ b/flow/model/qrecord_stream.go @@ -1,33 +1,33 @@ package model import ( - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type QRecordStream struct { schemaLatch chan struct{} - Records chan []qvalue.QValue + Records chan []types.QValue err error - schema qvalue.QRecordSchema + schema types.QRecordSchema schemaSet bool } func NewQRecordStream(buffer int) *QRecordStream { return &QRecordStream{ schemaLatch: make(chan struct{}), - Records: make(chan []qvalue.QValue, buffer), - schema: qvalue.QRecordSchema{}, + Records: make(chan []types.QValue, buffer), + schema: types.QRecordSchema{}, err: nil, schemaSet: false, } } -func (s *QRecordStream) Schema() (qvalue.QRecordSchema, error) { +func (s *QRecordStream) Schema() (types.QRecordSchema, error) { <-s.schemaLatch return s.schema, s.Err() } -func (s *QRecordStream) SetSchema(schema qvalue.QRecordSchema) { +func (s *QRecordStream) SetSchema(schema types.QRecordSchema) { if !s.schemaSet { s.schema = schema s.schemaSet = true @@ -56,6 +56,6 @@ func (s *QRecordStream) Close(err error) { close(s.Records) } if !s.schemaSet { - s.SetSchema(qvalue.QRecordSchema{}) + s.SetSchema(types.QRecordSchema{}) } } diff --git a/flow/model/qrecord_test.go b/flow/model/qrecord_test.go index 3af119ef32..e2c0d4154a 100644 --- a/flow/model/qrecord_test.go +++ b/flow/model/qrecord_test.go @@ -8,7 +8,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/PeerDB-io/peerdb/flow/e2eshared" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func TestEquals(t *testing.T) { @@ -17,32 +17,32 @@ func TestEquals(t *testing.T) { tests := []struct { name string - q1 []qvalue.QValue - q2 []qvalue.QValue + q1 []types.QValue + q2 []types.QValue want bool }{ { name: "Equal - Same UUID", - q1: []qvalue.QValue{qvalue.QValueUUID{Val: uuidVal1}}, - q2: []qvalue.QValue{qvalue.QValueString{Val: uuidVal1.String()}}, + q1: []types.QValue{types.QValueUUID{Val: uuidVal1}}, + q2: []types.QValue{types.QValueString{Val: uuidVal1.String()}}, want: true, }, { name: "Not Equal - Different UUID", - q1: []qvalue.QValue{qvalue.QValueUUID{Val: uuidVal1}}, - q2: []qvalue.QValue{qvalue.QValueUUID{Val: uuidVal2}}, + q1: []types.QValue{types.QValueUUID{Val: uuidVal1}}, + q2: []types.QValue{types.QValueUUID{Val: uuidVal2}}, want: false, }, { name: "Equal - Same numeric", - q1: []qvalue.QValue{qvalue.QValueNumeric{Val: decimal.NewFromInt(5)}}, - q2: []qvalue.QValue{qvalue.QValueString{Val: "5"}}, + q1: []types.QValue{types.QValueNumeric{Val: decimal.NewFromInt(5)}}, + q2: []types.QValue{types.QValueString{Val: "5"}}, want: true, }, { name: "Not Equal - Different numeric", - q1: []qvalue.QValue{qvalue.QValueNumeric{Val: decimal.NewFromInt(5)}}, - q2: []qvalue.QValue{qvalue.QValueString{Val: "4.99"}}, + q1: []types.QValue{types.QValueNumeric{Val: decimal.NewFromInt(5)}}, + q2: []types.QValue{types.QValueString{Val: "4.99"}}, want: false, }, } diff --git a/flow/model/qvalue/avro_converter.go b/flow/model/qvalue/avro_converter.go index f0918c4438..2a110c52ed 100644 --- a/flow/model/qvalue/avro_converter.go +++ b/flow/model/qvalue/avro_converter.go @@ -4,9 +4,9 @@ import ( "context" "encoding/base64" "encoding/hex" - "errors" "fmt" "log/slog" + "math/big" "regexp" "strings" "time" @@ -16,28 +16,16 @@ import ( "github.com/shopspring/decimal" "go.temporal.io/sdk/log" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/exceptions" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) var re = regexp.MustCompile(`[^A-Za-z0-9_]`) -func TruncateOrLogNumeric(num decimal.Decimal, precision int16, scale int16, targetDB protos.DBType) (decimal.Decimal, error) { - if targetDB == protos.DBType_SNOWFLAKE || targetDB == protos.DBType_BIGQUERY { - bidigi := datatypes.CountDigits(num.BigInt()) - avroPrecision, avroScale := DetermineNumericSettingForDWH(precision, scale, targetDB) - if bidigi+int(avroScale) > int(avroPrecision) { - slog.Warn("Clearing NUMERIC value with too many digits", slog.Any("number", num)) - return num, errors.New("invalid numeric") - } else if num.Exponent() < -int32(avroScale) { - num = num.Truncate(int32(avroScale)) - slog.Warn("Truncated NUMERIC value", slog.Any("number", num)) - } - } - return num, nil -} - // ConvertToAvroCompatibleName converts a column name to a field name that is compatible with Avro. func ConvertToAvroCompatibleName(columnName string) string { // Avro field names must: @@ -65,35 +53,38 @@ func ConvertToAvroCompatibleName(columnName string) string { func GetAvroSchemaFromQValueKind( ctx context.Context, env map[string]string, - kind QValueKind, + kind types.QValueKind, targetDWH protos.DBType, precision int16, scale int16, ) (avro.Schema, error) { switch kind { - case QValueKindString, QValueKindEnum, QValueKindQChar, QValueKindCIDR, QValueKindINET, QValueKindMacaddr: + case types.QValueKindString, types.QValueKindEnum, types.QValueKindQChar, types.QValueKindCIDR, + types.QValueKindINET, types.QValueKindMacaddr: return avro.NewPrimitiveSchema(avro.String, nil), nil - case QValueKindInterval: + case types.QValueKindInterval: return avro.NewPrimitiveSchema(avro.String, nil), nil - case QValueKindUUID: + case types.QValueKindArrayInterval: + return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.String, nil)), nil + case types.QValueKindUUID: return avro.NewPrimitiveSchema(avro.String, avro.NewPrimitiveLogicalSchema(avro.UUID)), nil - case QValueKindArrayUUID: + case types.QValueKindArrayUUID: return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.String, avro.NewPrimitiveLogicalSchema(avro.UUID))), nil - case QValueKindGeometry, QValueKindGeography, QValueKindPoint: + case types.QValueKindGeometry, types.QValueKindGeography, types.QValueKindPoint: return avro.NewPrimitiveSchema(avro.String, nil), nil - case QValueKindInt8, QValueKindInt16, QValueKindInt32, QValueKindInt64, - QValueKindUInt8, QValueKindUInt16, QValueKindUInt32, QValueKindUInt64: + case types.QValueKindInt8, types.QValueKindInt16, types.QValueKindInt32, types.QValueKindInt64, + types.QValueKindUInt8, types.QValueKindUInt16, types.QValueKindUInt32, types.QValueKindUInt64: return avro.NewPrimitiveSchema(avro.Long, nil), nil - case QValueKindFloat32: + case types.QValueKindFloat32: if targetDWH == protos.DBType_BIGQUERY { return avro.NewPrimitiveSchema(avro.Double, nil), nil } return avro.NewPrimitiveSchema(avro.Float, nil), nil - case QValueKindFloat64: + case types.QValueKindFloat64: return avro.NewPrimitiveSchema(avro.Double, nil), nil - case QValueKindBoolean: + case types.QValueKindBoolean: return avro.NewPrimitiveSchema(avro.Boolean, nil), nil - case QValueKindBytes: + case types.QValueKindBytes: format, err := internal.PeerDBBinaryFormat(ctx, env) if err != nil { return nil, err @@ -102,85 +93,94 @@ func GetAvroSchemaFromQValueKind( return avro.NewPrimitiveSchema(avro.String, nil), nil } return avro.NewPrimitiveSchema(avro.Bytes, nil), nil - case QValueKindNumeric: - if targetDWH == protos.DBType_CLICKHOUSE { - if precision == 0 && scale == 0 { - asString, err := internal.PeerDBEnableClickHouseNumericAsString(ctx, env) - if err != nil { - return nil, err - } - if asString { - return avro.NewPrimitiveSchema(avro.String, nil), nil - } - } - if precision > datatypes.PeerDBClickHouseMaxPrecision { - return avro.NewPrimitiveSchema(avro.String, nil), nil - } - } - avroNumericPrecision, avroNumericScale := DetermineNumericSettingForDWH(precision, scale, targetDWH) - return avro.NewPrimitiveSchema(avro.Bytes, avro.NewDecimalLogicalSchema(int(avroNumericPrecision), int(avroNumericScale))), nil - case QValueKindDate: + case types.QValueKindNumeric: + return getAvroNumericSchema(ctx, env, targetDWH, precision, scale) + case types.QValueKindDate: if targetDWH == protos.DBType_SNOWFLAKE { return avro.NewPrimitiveSchema(avro.String, nil), nil } return avro.NewPrimitiveSchema(avro.Int, avro.NewPrimitiveLogicalSchema(avro.Date)), nil - case QValueKindTime, QValueKindTimeTZ: + case types.QValueKindTime, types.QValueKindTimeTZ: if targetDWH == protos.DBType_SNOWFLAKE { return avro.NewPrimitiveSchema(avro.String, nil), nil } return avro.NewPrimitiveSchema(avro.Long, avro.NewPrimitiveLogicalSchema(avro.TimeMicros)), nil - case QValueKindTimestamp, QValueKindTimestampTZ: + case types.QValueKindTimestamp, types.QValueKindTimestampTZ: if targetDWH == protos.DBType_SNOWFLAKE { return avro.NewPrimitiveSchema(avro.String, nil), nil } return avro.NewPrimitiveSchema(avro.Long, avro.NewPrimitiveLogicalSchema(avro.TimestampMicros)), nil - case QValueKindTSTZRange: + case types.QValueKindHStore, types.QValueKindJSON, types.QValueKindJSONB: return avro.NewPrimitiveSchema(avro.String, nil), nil - case QValueKindHStore, QValueKindJSON, QValueKindJSONB: - return avro.NewPrimitiveSchema(avro.String, nil), nil - case QValueKindArrayFloat32: + case types.QValueKindArrayFloat32: return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Float, nil)), nil - case QValueKindArrayFloat64: + case types.QValueKindArrayFloat64: return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Double, nil)), nil - case QValueKindArrayInt32, QValueKindArrayInt16: + case types.QValueKindArrayInt32, types.QValueKindArrayInt16: return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Int, nil)), nil - case QValueKindArrayInt64: + case types.QValueKindArrayInt64: return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Long, nil)), nil - case QValueKindArrayBoolean: + case types.QValueKindArrayBoolean: return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Boolean, nil)), nil - case QValueKindArrayDate: + case types.QValueKindArrayDate: if targetDWH == protos.DBType_SNOWFLAKE { return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.String, nil)), nil } return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Int, avro.NewPrimitiveLogicalSchema(avro.Date))), nil - case QValueKindArrayTimestamp, QValueKindArrayTimestampTZ: + case types.QValueKindArrayTimestamp, types.QValueKindArrayTimestampTZ: if targetDWH == protos.DBType_SNOWFLAKE { return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.String, nil)), nil } return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.Long, avro.NewPrimitiveLogicalSchema(avro.TimestampMicros))), nil - case QValueKindArrayJSON, QValueKindArrayJSONB: + case types.QValueKindArrayJSON, types.QValueKindArrayJSONB: return avro.NewPrimitiveSchema(avro.String, nil), nil - case QValueKindArrayString, QValueKindArrayEnum: + case types.QValueKindArrayString, types.QValueKindArrayEnum: return avro.NewArraySchema(avro.NewPrimitiveSchema(avro.String, nil)), nil - case QValueKindInvalid: + case types.QValueKindArrayNumeric: + numericSchema, err := getAvroNumericSchema(ctx, env, targetDWH, precision, scale) + if err != nil { + return nil, err + } + return avro.NewArraySchema(numericSchema), nil + case types.QValueKindInvalid: // lets attempt to do invalid as a string return avro.NewPrimitiveSchema(avro.String, nil), nil default: - return nil, fmt.Errorf("unsupported QValueKind type: %s", kind) + return nil, fmt.Errorf("unsupported types.QValueKind type: %s", kind) + } +} + +func getAvroNumericSchema( + ctx context.Context, + env map[string]string, + targetDWH protos.DBType, + precision int16, + scale int16, +) (avro.Schema, error) { + asString, err := internal.PeerDBEnableClickHouseNumericAsString(ctx, env) + if err != nil { + return nil, err + } + destinationType := GetNumericDestinationType(precision, scale, targetDWH, asString) + if destinationType.IsString { + return avro.NewPrimitiveSchema(avro.String, nil), nil } + return avro.NewPrimitiveSchema(avro.Bytes, + avro.NewDecimalLogicalSchema(int(destinationType.Precision), int(destinationType.Scale))), nil } type QValueAvroConverter struct { - *QField + *types.QField logger log.Logger + Stat *NumericStat TargetDWH protos.DBType UnboundedNumericAsString bool } func QValueToAvro( ctx context.Context, env map[string]string, - value QValue, field *QField, targetDWH protos.DBType, logger log.Logger, - unboundedNumericAsString bool, + value types.QValue, field *types.QField, targetDWH protos.DBType, logger log.Logger, + unboundedNumericAsString bool, stat *NumericStat, ) (any, error) { if value.Value() == nil { return nil, nil @@ -188,31 +188,32 @@ func QValueToAvro( c := QValueAvroConverter{ QField: field, - TargetDWH: targetDWH, logger: logger, + Stat: stat, + TargetDWH: targetDWH, UnboundedNumericAsString: unboundedNumericAsString, } switch v := value.(type) { - case QValueInvalid: + case types.QValueInvalid: // we will attempt to convert invalid to a string return c.processNullableUnion(v.Val) - case QValueTime: + case types.QValueTime: return c.processNullableUnion(c.processGoTime(v.Val)) - case QValueTimeTZ: - return c.processNullableUnion(c.processGoTimeTZ(v.Val)) - case QValueTimestamp: + case types.QValueTimeTZ: + return c.processNullableUnion(c.processGoTime(v.Val)) + case types.QValueTimestamp: return c.processNullableUnion(c.processGoTimestamp(v.Val)) - case QValueTimestampTZ: + case types.QValueTimestampTZ: return c.processNullableUnion(c.processGoTimestampTZ(v.Val)) - case QValueDate: + case types.QValueDate: return c.processNullableUnion(c.processGoDate(v.Val)) - case QValueQChar: + case types.QValueQChar: return c.processNullableUnion(string(v.Val)) - case QValueString, - QValueCIDR, QValueINET, QValueMacaddr, - QValueInterval, QValueTSTZRange, QValueEnum, - QValueGeography, QValueGeometry, QValuePoint: + case types.QValueString, + types.QValueCIDR, types.QValueINET, types.QValueMacaddr, + types.QValueInterval, types.QValueEnum, + types.QValueGeography, types.QValueGeometry, types.QValuePoint: if c.TargetDWH == protos.DBType_SNOWFLAKE && v.Value() != nil && (len(v.Value().(string)) > 15*1024*1024) { slog.Warn("Clearing TEXT value > 15MB for Snowflake!") @@ -220,90 +221,86 @@ func QValueToAvro( return nil, nil } return c.processNullableUnion(v.Value()) - case QValueFloat32: + case types.QValueFloat32: if c.TargetDWH == protos.DBType_BIGQUERY { return c.processNullableUnion(float64(v.Val)) } return c.processNullableUnion(v.Val) - case QValueFloat64: + case types.QValueFloat64: return c.processNullableUnion(v.Val) - case QValueInt8: + case types.QValueInt8: return c.processNullableUnion(int64(v.Val)) - case QValueInt16: + case types.QValueInt16: return c.processNullableUnion(int64(v.Val)) - case QValueInt32: + case types.QValueInt32: return c.processNullableUnion(int64(v.Val)) - case QValueInt64: + case types.QValueInt64: return c.processNullableUnion(v.Val) - case QValueUInt8: + case types.QValueUInt8: return c.processNullableUnion(int64(v.Val)) - case QValueUInt16: + case types.QValueUInt16: return c.processNullableUnion(int64(v.Val)) - case QValueUInt32: + case types.QValueUInt32: return c.processNullableUnion(int64(v.Val)) - case QValueUInt64: + case types.QValueUInt64: return c.processNullableUnion(int64(v.Val)) - case QValueBoolean: + case types.QValueBoolean: return c.processNullableUnion(v.Val) - case QValueNumeric: + case types.QValueNumeric: return c.processNumeric(v.Val), nil - case QValueBytes: + case types.QValueBytes: format, err := internal.PeerDBBinaryFormat(ctx, env) if err != nil { return nil, err } return c.processBytes(v.Val, format), nil - case QValueJSON: + case types.QValueJSON: return c.processJSON(v.Val), nil - case QValueHStore: + case types.QValueHStore: return c.processHStore(v.Val) - case QValueArrayFloat32: + case types.QValueArrayFloat32: return c.processArrayFloat32(v.Val), nil - case QValueArrayFloat64: + case types.QValueArrayFloat64: return c.processArrayFloat64(v.Val), nil - case QValueArrayInt16: + case types.QValueArrayInt16: return c.processArrayInt16(v.Val), nil - case QValueArrayInt32: + case types.QValueArrayInt32: return c.processArrayInt32(v.Val), nil - case QValueArrayInt64: + case types.QValueArrayInt64: return c.processArrayInt64(v.Val), nil - case QValueArrayString: + case types.QValueArrayString: + return c.processArrayString(v.Val), nil + case types.QValueArrayEnum: return c.processArrayString(v.Val), nil - case QValueArrayEnum: + case types.QValueArrayInterval: return c.processArrayString(v.Val), nil - case QValueArrayBoolean: + case types.QValueArrayBoolean: return c.processArrayBoolean(v.Val), nil - case QValueArrayTimestamp: + case types.QValueArrayTimestamp: return c.processArrayTime(v.Val), nil - case QValueArrayTimestampTZ: + case types.QValueArrayTimestampTZ: return c.processArrayTime(v.Val), nil - case QValueArrayDate: + case types.QValueArrayDate: return c.processArrayDate(v.Val), nil - case QValueUUID: + case types.QValueUUID: return c.processUUID(v.Val), nil - case QValueArrayUUID: + case types.QValueArrayUUID: return c.processArrayUUID(v.Val), nil + case types.QValueArrayNumeric: + return c.processArrayNumeric(v.Val), nil default: return nil, fmt.Errorf("[toavro] unsupported %T", value) } } -func (c *QValueAvroConverter) processGoTimeTZ(t time.Time) any { - // Snowflake has issues with avro timestamp types, returning as string form - // See: https://stackoverflow.com/questions/66104762/snowflake-date-column-have-incorrect-date-from-avro-file - if c.TargetDWH == protos.DBType_SNOWFLAKE { - return t.Format("15:04:05.999999-0700") - } - return time.Duration(t.UnixMicro()) * time.Microsecond -} - -func (c *QValueAvroConverter) processGoTime(t time.Time) any { +func (c *QValueAvroConverter) processGoTime(t time.Duration) any { // Snowflake has issues with avro timestamp types, returning as string form // See: https://stackoverflow.com/questions/66104762/snowflake-date-column-have-incorrect-date-from-avro-file if c.TargetDWH == protos.DBType_SNOWFLAKE { - return t.Format("15:04:05.999999") + t = max(min(t, 86399999999*time.Microsecond), 0) + return time.Time{}.Add(t).Format("15:04:05.999999") } - return time.Duration(t.UnixMicro()) * time.Microsecond + return t } func (c *QValueAvroConverter) processGoTimestampTZ(t time.Time) any { @@ -366,15 +363,18 @@ func (c *QValueAvroConverter) processNullableUnion( } func (c *QValueAvroConverter) processNumeric(num decimal.Decimal) any { - if (c.UnboundedNumericAsString && c.Precision == 0 && c.Scale == 0) || - (c.TargetDWH == protos.DBType_CLICKHOUSE && c.Precision > datatypes.PeerDBClickHouseMaxPrecision) { + destType := GetNumericDestinationType(c.Precision, c.Scale, c.TargetDWH, c.UnboundedNumericAsString) + if destType.IsString { numStr, _ := c.processNullableUnion(num.String()) return numStr } - num, err := TruncateOrLogNumeric(num, c.Precision, c.Scale, c.TargetDWH) - if err != nil { - return nil + num, ok := TruncateNumeric(num, destType.Precision, destType.Scale, c.TargetDWH, c.Stat) + if !ok { + if c.Nullable { + return nil + } + return big.Rat{} } rat := num.Rat() @@ -384,6 +384,28 @@ func (c *QValueAvroConverter) processNumeric(num decimal.Decimal) any { return rat } +func (c *QValueAvroConverter) processArrayNumeric(arrayNum []decimal.Decimal) any { + destType := GetNumericDestinationType(c.Precision, c.Scale, c.TargetDWH, c.UnboundedNumericAsString) + if destType.IsString { + transformedNumArr := make([]string, 0, len(arrayNum)) + for _, num := range arrayNum { + transformedNumArr = append(transformedNumArr, num.String()) + } + return transformedNumArr + } + + transformedNumArr := make([]*big.Rat, 0, len(arrayNum)) + for _, num := range arrayNum { + num, ok := TruncateNumeric(num, destType.Precision, destType.Scale, c.TargetDWH, c.Stat) + if !ok { + transformedNumArr = append(transformedNumArr, &big.Rat{}) + continue + } + transformedNumArr = append(transformedNumArr, num.Rat()) + } + return transformedNumArr +} + func (c *QValueAvroConverter) processBytes(byteData []byte, format internal.BinaryFormat) any { if c.TargetDWH == protos.DBType_CLICKHOUSE && format != internal.BinaryFormatRaw { var encoded string @@ -537,3 +559,72 @@ func (c *QValueAvroConverter) processArrayFloat64(arrayData []float64) any { func (c *QValueAvroConverter) processArrayString(arrayData []string) any { return arrayData } + +func TruncateNumeric( + num decimal.Decimal, targetPrecision, targetScale int16, targetDWH protos.DBType, stat *NumericStat, +) (decimal.Decimal, bool) { + switch targetDWH { + case protos.DBType_CLICKHOUSE, protos.DBType_SNOWFLAKE, protos.DBType_BIGQUERY: + bi := num.BigInt() + bidigi := datatypes.CountDigits(bi) + if bi.Sign() == 0 { + bidigi = 0 + } + if bidigi+int(targetScale) > int(targetPrecision) { + if stat != nil { + stat.LongIntegersClearedCount++ + stat.MaxIntegerDigits = max(int32(bidigi), stat.MaxIntegerDigits) + } + return decimal.Zero, false + } else if num.Exponent() < -int32(targetScale) { + if stat != nil { + stat.TruncatedCount++ + stat.MaxExponent = max(-num.Exponent(), stat.MaxExponent) + } + return num.Truncate(int32(targetScale)), true + } + } + return num, true +} + +//nolint:govet // logically grouped, fieldalignment confuses things +type NumericStat struct { + DestinationTable string + DestinationColumn string + TruncatedCount uint64 + MaxExponent int32 + LongIntegersClearedCount uint64 + MaxIntegerDigits int32 +} + +func NewNumericStat(destinationTable, destinationColumn string) *NumericStat { + return &NumericStat{ + DestinationTable: destinationTable, + DestinationColumn: destinationColumn, + } +} + +func (ns *NumericStat) CollectWarnings(warnings *shared.QRepWarnings) { + if ns.LongIntegersClearedCount > 0 { + plural := "" + if ns.LongIntegersClearedCount > 1 { + plural = "s" + } + err := fmt.Errorf( + "column %s.%s: cleared %d NUMERIC value%s too big to fit into the destination column (got %d integer digits)", + ns.DestinationTable, ns.DestinationColumn, ns.LongIntegersClearedCount, plural, ns.MaxIntegerDigits) + warning := exceptions.NewNumericOutOfRangeError(err, ns.DestinationTable, ns.DestinationColumn) + *warnings = append(*warnings, warning) + } + if ns.TruncatedCount > 0 { + plural := "" + if ns.TruncatedCount > 1 { + plural = "s" + } + err := fmt.Errorf( + "column %s.%s: truncated %d NUMERIC value%s too precise to fit into the destination column (got %d digits of exponent)", + ns.DestinationTable, ns.DestinationColumn, ns.TruncatedCount, plural, ns.MaxExponent) + warning := exceptions.NewNumericTruncatedError(err, ns.DestinationTable, ns.DestinationColumn) + *warnings = append(*warnings, warning) + } +} diff --git a/flow/model/qvalue/avro_converter_test.go b/flow/model/qvalue/avro_converter_test.go index a0c1fb6bd7..3e9066c842 100644 --- a/flow/model/qvalue/avro_converter_test.go +++ b/flow/model/qvalue/avro_converter_test.go @@ -1,11 +1,9 @@ -package qvalue_test +package qvalue import ( "testing" "github.com/stretchr/testify/assert" - - "github.com/PeerDB-io/peerdb/flow/model/qvalue" ) func TestColumnNameAvroFieldConvert(t *testing.T) { @@ -61,7 +59,7 @@ func TestColumnNameAvroFieldConvert(t *testing.T) { for i, columnName := range testColumnNames { t.Run(columnName, func(t *testing.T) { - assert.Equal(t, expectedColumnNames[i], qvalue.ConvertToAvroCompatibleName(columnName)) + assert.Equal(t, expectedColumnNames[i], ConvertToAvroCompatibleName(columnName)) }) } } diff --git a/flow/model/qvalue/dwh.go b/flow/model/qvalue/dwh.go index 97ad0e9158..7c84f3fab9 100644 --- a/flow/model/qvalue/dwh.go +++ b/flow/model/qvalue/dwh.go @@ -5,8 +5,8 @@ import ( "go.temporal.io/sdk/log" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" ) func DetermineNumericSettingForDWH(precision int16, scale int16, dwh protos.DBType) (int16, int16) { diff --git a/flow/model/qvalue/equals.go b/flow/model/qvalue/equals.go index 42a28c74a0..d990ce7fa7 100644 --- a/flow/model/qvalue/equals.go +++ b/flow/model/qvalue/equals.go @@ -14,7 +14,8 @@ import ( "github.com/shopspring/decimal" geom "github.com/twpayne/go-geos" - "github.com/PeerDB-io/peerdb/flow/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func valueEmpty(value any) bool { @@ -22,7 +23,7 @@ func valueEmpty(value any) bool { (reflect.TypeOf(value).Kind() == reflect.Slice && reflect.ValueOf(value).Len() == 0) } -func Equals(qv QValue, other QValue) bool { +func Equals(qv types.QValue, other types.QValue) bool { qvValue := qv.Value() otherValue := other.Value() if valueEmpty(qvValue) && valueEmpty(otherValue) { @@ -30,72 +31,72 @@ func Equals(qv QValue, other QValue) bool { } switch q := qv.(type) { - case QValueInvalid: + case types.QValueInvalid: return true - case QValueFloat32: + case types.QValueFloat32: float2, ok2 := getFloat32(other.Value()) return ok2 && q.Val == float2 - case QValueFloat64: + case types.QValueFloat64: float2, ok2 := getFloat64(other.Value()) return ok2 && q.Val == float2 - case QValueInt8: + case types.QValueInt8: int2, ok2 := getInt64(other.Value()) return ok2 && int64(q.Val) == int2 - case QValueInt16: + case types.QValueInt16: int2, ok2 := getInt64(other.Value()) return ok2 && int64(q.Val) == int2 - case QValueInt32: + case types.QValueInt32: int2, ok2 := getInt64(other.Value()) return ok2 && int64(q.Val) == int2 - case QValueInt64: + case types.QValueInt64: int2, ok2 := getInt64(other.Value()) return ok2 && q.Val == int2 - case QValueUInt8: + case types.QValueUInt8: int2, ok2 := getUInt64(other.Value()) return ok2 && uint64(q.Val) == int2 - case QValueUInt16: + case types.QValueUInt16: int2, ok2 := getUInt64(other.Value()) return ok2 && uint64(q.Val) == int2 - case QValueUInt32: + case types.QValueUInt32: int2, ok2 := getUInt64(other.Value()) return ok2 && uint64(q.Val) == int2 - case QValueUInt64: + case types.QValueUInt64: int2, ok2 := getUInt64(other.Value()) return ok2 && q.Val == int2 - case QValueBoolean: - if otherVal, ok := other.(QValueBoolean); ok { + case types.QValueBoolean: + if otherVal, ok := other.(types.QValueBoolean); ok { return q.Val == otherVal.Val } return false - case QValueQChar: - if otherVal, ok := other.(QValueQChar); ok { + case types.QValueQChar: + if otherVal, ok := other.(types.QValueQChar); ok { return q.Val == otherVal.Val } return false - case QValueString: + case types.QValueString: return compareString(q.Val, otherValue) - case QValueEnum: + case types.QValueEnum: return compareString(q.Val, otherValue) - case QValueINET: + case types.QValueINET: return compareString(q.Val, otherValue) - case QValueCIDR: + case types.QValueCIDR: return compareString(q.Val, otherValue) - case QValueMacaddr: + case types.QValueMacaddr: return compareString(q.Val, otherValue) // all internally represented as a Golang time.Time - case QValueTimestamp, QValueTimestampTZ: + case types.QValueTimestamp, types.QValueTimestampTZ: return compareGoTimestamp(qvValue, otherValue) - case QValueTime, QValueTimeTZ: + case types.QValueTime, types.QValueTimeTZ: return compareGoTime(qvValue, otherValue) - case QValueDate: + case types.QValueDate: return compareGoDate(qvValue, otherValue) - case QValueNumeric: + case types.QValueNumeric: return compareNumeric(q.Val, otherValue) - case QValueBytes: + case types.QValueBytes: return compareBytes(qvValue, otherValue) - case QValueUUID: + case types.QValueUUID: return compareUUID(qvValue, otherValue) - case QValueJSON: + case types.QValueJSON: if otherValue == nil || otherValue == "" { // TODO make this more strict return true @@ -109,26 +110,30 @@ func Equals(qv QValue, other QValue) bool { return false } return reflect.DeepEqual(a, b) - case QValueGeometry: + case types.QValueGeometry: return compareGeometry(q.Val, otherValue) - case QValueGeography: + case types.QValueGeography: return compareGeometry(q.Val, otherValue) - case QValueHStore: + case types.QValueHStore: return compareHStore(q.Val, otherValue) - case QValueArrayInt32, QValueArrayInt16, QValueArrayInt64, QValueArrayFloat32, QValueArrayFloat64: - return compareNumericArrays(qvValue, otherValue) - case QValueArrayDate: + case types.QValueArrayInt32, types.QValueArrayInt16, types.QValueArrayInt64, types.QValueArrayFloat32, types.QValueArrayFloat64: + return compareNativeNumericArrays(qvValue, otherValue) + case types.QValueArrayNumeric: + return compareNumericArrays(q.Val, otherValue) + case types.QValueArrayDate: return compareDateArrays(q.Val, otherValue) - case QValueArrayTimestamp: + case types.QValueArrayInterval: + return compareArrays(q.Val, otherValue) + case types.QValueArrayTimestamp: return compareTimeArrays(q.Val, otherValue) - case QValueArrayTimestampTZ: + case types.QValueArrayTimestampTZ: return compareTimeArrays(q.Val, otherValue) - case QValueArrayBoolean: + case types.QValueArrayBoolean: return compareArrays(q.Val, otherValue) - case QValueArrayUUID: + case types.QValueArrayUUID: return compareArrays(q.Val, otherValue) - case QValueArrayString: - if qjson, ok := other.(QValueJSON); ok { + case types.QValueArrayString: + if qjson, ok := other.(types.QValueJSON); ok { var val []string if err := json.Unmarshal([]byte(qjson.Val), &val); err != nil { return false @@ -137,8 +142,8 @@ func Equals(qv QValue, other QValue) bool { } return compareArrays(q.Val, otherValue) - case QValueArrayEnum: - if qjson, ok := other.(QValueJSON); ok { + case types.QValueArrayEnum: + if qjson, ok := other.(types.QValueJSON); ok { var val []string if err := json.Unmarshal([]byte(qjson.Val), &val); err != nil { return false @@ -169,16 +174,21 @@ func compareGoTimestamp(value1, value2 any) bool { } func compareGoTime(value1, value2 any) bool { - t1, ok1 := value1.(time.Time) - t2, ok2 := value2.(time.Time) + t1, ok1 := value1.(time.Duration) + t2, ok2 := value2.(time.Duration) - if !ok1 || !ok2 { - return false + if !ok1 { + var tm time.Time + tm, ok1 = value1.(time.Time) + t1 = tm.Sub(time.Unix(0, 0).UTC()) + } + if !ok2 { + var tm time.Time + tm, ok2 = value2.(time.Time) + t2 = tm.Sub(time.Unix(0, 0).UTC()) } - h1, m1, s1 := t1.Clock() - h2, m2, s2 := t2.Clock() - return h1 == h2 && m1 == m2 && s1 == s2 + return ok1 && ok2 && t1 == t2 } func compareGoDate(value1, value2 any) bool { @@ -251,7 +261,7 @@ func compareGeometry(geoWkt string, value2 any) bool { return geo1.Equals(geo2) } -func convertNumericArrayToFloat64Array(val any) []float64 { +func convertNativeNumericArrayToFloat64Array(val any) []float64 { switch v := val.(type) { case []int16: result := make([]float64, len(v)) @@ -290,9 +300,9 @@ func convertNumericArrayToFloat64Array(val any) []float64 { } } -func compareNumericArrays(value1, value2 any) bool { - array1 := convertNumericArrayToFloat64Array(value1) - array2 := convertNumericArrayToFloat64Array(value2) +func compareNativeNumericArrays(value1, value2 any) bool { + array1 := convertNativeNumericArrayToFloat64Array(value1) + array2 := convertNativeNumericArrayToFloat64Array(value2) if array1 == nil || array2 == nil { return false } @@ -302,6 +312,13 @@ func compareNumericArrays(value1, value2 any) bool { }) } +func compareNumericArrays(array1 []decimal.Decimal, value2 any) bool { + array2, ok2 := value2.([]decimal.Decimal) + return ok2 && slices.EqualFunc(array1, array2, func(x, y decimal.Decimal) bool { + return x.Equal(y) + }) +} + func compareDateArrays(array1 []time.Time, value2 any) bool { array2, ok2 := value2.([]time.Time) return ok2 && slices.EqualFunc(array1, array2, func(x time.Time, y time.Time) bool { diff --git a/flow/model/qvalue/kind.go b/flow/model/qvalue/kind.go index d6cdeb562e..67c09a8d94 100644 --- a/flow/model/qvalue/kind.go +++ b/flow/model/qvalue/kind.go @@ -3,191 +3,65 @@ package qvalue import ( "context" "fmt" - "strings" - "github.com/PeerDB-io/peerdb/flow/datatypes" "github.com/PeerDB-io/peerdb/flow/generated/protos" "github.com/PeerDB-io/peerdb/flow/internal" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) -type QValueKind string - -const ( - QValueKindInvalid QValueKind = "invalid" - QValueKindFloat32 QValueKind = "float32" - QValueKindFloat64 QValueKind = "float64" - QValueKindInt8 QValueKind = "int8" - QValueKindInt16 QValueKind = "int16" - QValueKindInt32 QValueKind = "int32" - QValueKindInt64 QValueKind = "int64" - QValueKindUInt8 QValueKind = "uint8" - QValueKindUInt16 QValueKind = "uint16" - QValueKindUInt32 QValueKind = "uint32" - QValueKindUInt64 QValueKind = "uint64" - QValueKindBoolean QValueKind = "bool" - QValueKindQChar QValueKind = "qchar" - QValueKindString QValueKind = "string" - QValueKindEnum QValueKind = "enum" - QValueKindTimestamp QValueKind = "timestamp" - QValueKindTimestampTZ QValueKind = "timestamptz" - QValueKindDate QValueKind = "date" - QValueKindTime QValueKind = "time" - QValueKindTimeTZ QValueKind = "timetz" - QValueKindInterval QValueKind = "interval" - QValueKindTSTZRange QValueKind = "tstzrange" - QValueKindNumeric QValueKind = "numeric" - QValueKindBytes QValueKind = "bytes" - QValueKindUUID QValueKind = "uuid" - QValueKindJSON QValueKind = "json" - QValueKindJSONB QValueKind = "jsonb" - QValueKindHStore QValueKind = "hstore" - QValueKindGeography QValueKind = "geography" - QValueKindGeometry QValueKind = "geometry" - QValueKindPoint QValueKind = "point" - - // network types - QValueKindCIDR QValueKind = "cidr" - QValueKindINET QValueKind = "inet" - QValueKindMacaddr QValueKind = "macaddr" - - // array types - QValueKindArrayFloat32 QValueKind = "array_float32" - QValueKindArrayFloat64 QValueKind = "array_float64" - QValueKindArrayInt16 QValueKind = "array_int16" - QValueKindArrayInt32 QValueKind = "array_int32" - QValueKindArrayInt64 QValueKind = "array_int64" - QValueKindArrayString QValueKind = "array_string" - QValueKindArrayEnum QValueKind = "array_enum" - QValueKindArrayDate QValueKind = "array_date" - QValueKindArrayTimestamp QValueKind = "array_timestamp" - QValueKindArrayTimestampTZ QValueKind = "array_timestamptz" - QValueKindArrayBoolean QValueKind = "array_bool" - QValueKindArrayJSON QValueKind = "array_json" - QValueKindArrayJSONB QValueKind = "array_jsonb" - QValueKindArrayUUID QValueKind = "array_uuid" -) - -func (kind QValueKind) IsArray() bool { - return strings.HasPrefix(string(kind), "array_") -} - -var QValueKindToSnowflakeTypeMap = map[QValueKind]string{ - QValueKindBoolean: "BOOLEAN", - QValueKindInt8: "INTEGER", - QValueKindInt16: "INTEGER", - QValueKindInt32: "INTEGER", - QValueKindInt64: "INTEGER", - QValueKindUInt8: "INTEGER", - QValueKindUInt16: "INTEGER", - QValueKindUInt32: "INTEGER", - QValueKindUInt64: "INTEGER", - QValueKindFloat32: "FLOAT", - QValueKindFloat64: "FLOAT", - QValueKindQChar: "CHAR", - QValueKindString: "STRING", - QValueKindEnum: "STRING", - QValueKindJSON: "VARIANT", - QValueKindJSONB: "VARIANT", - QValueKindTimestamp: "TIMESTAMP_NTZ", - QValueKindTimestampTZ: "TIMESTAMP_TZ", - QValueKindInterval: "VARIANT", - QValueKindTime: "TIME", - QValueKindTimeTZ: "TIME", - QValueKindDate: "DATE", - QValueKindBytes: "BINARY", - QValueKindUUID: "STRING", - QValueKindInvalid: "STRING", - QValueKindHStore: "VARIANT", - QValueKindGeography: "GEOGRAPHY", - QValueKindGeometry: "GEOMETRY", - QValueKindPoint: "GEOMETRY", - - // array types will be mapped to VARIANT - QValueKindArrayFloat32: "VARIANT", - QValueKindArrayFloat64: "VARIANT", - QValueKindArrayInt32: "VARIANT", - QValueKindArrayInt64: "VARIANT", - QValueKindArrayInt16: "VARIANT", - QValueKindArrayString: "VARIANT", - QValueKindArrayEnum: "VARIANT", - QValueKindArrayDate: "VARIANT", - QValueKindArrayTimestamp: "VARIANT", - QValueKindArrayTimestampTZ: "VARIANT", - QValueKindArrayBoolean: "VARIANT", - QValueKindArrayJSON: "VARIANT", - QValueKindArrayJSONB: "VARIANT", - QValueKindArrayUUID: "VARIANT", -} - -var QValueKindToClickHouseTypeMap = map[QValueKind]string{ - QValueKindBoolean: "Bool", - QValueKindInt8: "Int8", - QValueKindInt16: "Int16", - QValueKindInt32: "Int32", - QValueKindInt64: "Int64", - QValueKindUInt8: "UInt8", - QValueKindUInt16: "UInt16", - QValueKindUInt32: "UInt32", - QValueKindUInt64: "UInt64", - QValueKindFloat32: "Float32", - QValueKindFloat64: "Float64", - QValueKindQChar: "FixedString(1)", - QValueKindString: "String", - QValueKindEnum: "LowCardinality(String)", - QValueKindJSON: "String", - QValueKindTimestamp: "DateTime64(6)", - QValueKindTimestampTZ: "DateTime64(6)", - QValueKindTSTZRange: "String", - QValueKindTime: "DateTime64(6)", - QValueKindTimeTZ: "DateTime64(6)", - QValueKindDate: "Date32", - QValueKindBytes: "String", - QValueKindUUID: "UUID", - QValueKindInvalid: "String", - QValueKindHStore: "String", - - QValueKindArrayFloat32: "Array(Float32)", - QValueKindArrayFloat64: "Array(Float64)", - QValueKindArrayInt16: "Array(Int16)", - QValueKindArrayInt32: "Array(Int32)", - QValueKindArrayInt64: "Array(Int64)", - QValueKindArrayString: "Array(String)", - QValueKindArrayEnum: "Array(LowCardinality(String))", - QValueKindArrayBoolean: "Array(Bool)", - QValueKindArrayDate: "Array(Date)", - QValueKindArrayTimestamp: "Array(DateTime64(6))", - QValueKindArrayTimestampTZ: "Array(DateTime64(6))", - QValueKindArrayJSON: "String", - QValueKindArrayJSONB: "String", - QValueKindArrayUUID: "Array(UUID)", +type NumericDestinationType struct { + IsString bool + Precision, Scale int16 } -func getClickHouseTypeForNumericColumn(ctx context.Context, env map[string]string, column *protos.FieldDescription) (string, error) { - if column.TypeModifier == -1 { - numericAsStringEnabled, err := internal.PeerDBEnableClickHouseNumericAsString(ctx, env) - if err != nil { - return "", err +func GetNumericDestinationType( + precision, scale int16, targetDWH protos.DBType, unboundedNumericAsString bool, +) NumericDestinationType { + if targetDWH == protos.DBType_CLICKHOUSE { + if precision == 0 && scale == 0 && unboundedNumericAsString { + return NumericDestinationType{IsString: true} } - if numericAsStringEnabled { - return "String", nil + if precision > datatypes.PeerDBClickHouseMaxPrecision { + return NumericDestinationType{IsString: true} } - } else if rawPrecision, _ := datatypes.ParseNumericTypmod(column.TypeModifier); rawPrecision > datatypes.PeerDBClickHouseMaxPrecision { + } + destPrecision, destScale := DetermineNumericSettingForDWH(precision, scale, targetDWH) + return NumericDestinationType{ + IsString: false, + Precision: destPrecision, + Scale: destScale, + } +} + +func getClickHouseTypeForNumericColumn(ctx context.Context, env map[string]string, typeModifier int32) (string, error) { + precision, scale := datatypes.ParseNumericTypmod(typeModifier) + asString, err := internal.PeerDBEnableClickHouseNumericAsString(ctx, env) + if err != nil { + return "", err + } + destinationType := GetNumericDestinationType(precision, scale, protos.DBType_CLICKHOUSE, asString) + if destinationType.IsString { return "String", nil } - precision, scale := datatypes.GetNumericTypeForWarehouse(column.TypeModifier, datatypes.ClickHouseNumericCompatibility{}) - return fmt.Sprintf("Decimal(%d, %d)", precision, scale), nil + return fmt.Sprintf("Decimal(%d, %d)", destinationType.Precision, destinationType.Scale), nil } -func (kind QValueKind) ToDWHColumnType( - ctx context.Context, env map[string]string, dwhType protos.DBType, column *protos.FieldDescription, nullableEnabled bool, +func ToDWHColumnType( + ctx context.Context, + kind types.QValueKind, + env map[string]string, + dwhType protos.DBType, + column *protos.FieldDescription, + nullableEnabled bool, ) (string, error) { var colType string switch dwhType { case protos.DBType_SNOWFLAKE: - if kind == QValueKindNumeric { + if kind == types.QValueKindNumeric { precision, scale := datatypes.GetNumericTypeForWarehouse(column.TypeModifier, datatypes.SnowflakeNumericCompatibility{}) colType = fmt.Sprintf("NUMERIC(%d,%d)", precision, scale) - } else if val, ok := QValueKindToSnowflakeTypeMap[kind]; ok { + } else if val, ok := types.QValueKindToSnowflakeTypeMap[kind]; ok { colType = val } else { colType = "STRING" @@ -196,13 +70,20 @@ func (kind QValueKind) ToDWHColumnType( colType += " NOT NULL" } case protos.DBType_CLICKHOUSE: - if kind == QValueKindNumeric { + if kind == types.QValueKindNumeric { + var err error + colType, err = getClickHouseTypeForNumericColumn(ctx, env, column.TypeModifier) + if err != nil { + return "", err + } + } else if kind == types.QValueKindArrayNumeric { var err error - colType, err = getClickHouseTypeForNumericColumn(ctx, env, column) + colType, err = getClickHouseTypeForNumericColumn(ctx, env, column.TypeModifier) if err != nil { return "", err } - } else if val, ok := QValueKindToClickHouseTypeMap[kind]; ok { + colType = fmt.Sprintf("Array(%s)", colType) + } else if val, ok := types.QValueKindToClickHouseTypeMap[kind]; ok { colType = val } else { colType = "String" diff --git a/flow/model/record_items.go b/flow/model/record_items.go index 5af3855fec..43c1ce0371 100644 --- a/flow/model/record_items.go +++ b/flow/model/record_items.go @@ -4,9 +4,10 @@ import ( "encoding/json" "fmt" "math" + "time" - "github.com/PeerDB-io/peerdb/flow/datatypes" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/datatypes" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) type Items interface { @@ -24,20 +25,20 @@ func ItemsToJSON(items Items) (string, error) { // encoding/gob cannot encode unexported fields type RecordItems struct { - ColToVal map[string]qvalue.QValue + ColToVal map[string]types.QValue } func NewRecordItems(capacity int) RecordItems { return RecordItems{ - ColToVal: make(map[string]qvalue.QValue, capacity), + ColToVal: make(map[string]types.QValue, capacity), } } -func (r RecordItems) AddColumn(col string, val qvalue.QValue) { +func (r RecordItems) AddColumn(col string, val types.QValue) { r.ColToVal[col] = val } -func (r RecordItems) GetColumnValue(col string) qvalue.QValue { +func (r RecordItems) GetColumnValue(col string) types.QValue { return r.ColToVal[col] } @@ -57,7 +58,7 @@ func (r RecordItems) UpdateIfNotExists(input_ Items) []string { return updatedCols } -func (r RecordItems) GetValueByColName(colName string) (qvalue.QValue, error) { +func (r RecordItems) GetValueByColName(colName string) (types.QValue, error) { val, ok := r.ColToVal[colName] if !ok { return nil, fmt.Errorf("column name %s not found", colName) @@ -86,11 +87,11 @@ func (r RecordItems) toMap(opts ToJSONOptions) (map[string]any, error) { } switch v := qv.(type) { - case qvalue.QValueUUID: + case types.QValueUUID: jsonStruct[col] = v.Val - case qvalue.QValueQChar: + case types.QValueQChar: jsonStruct[col] = string(v.Val) - case qvalue.QValueString: + case types.QValueString: strVal := v.Val if len(strVal) > 15*1024*1024 { @@ -98,7 +99,7 @@ func (r RecordItems) toMap(opts ToJSONOptions) (map[string]any, error) { } else { jsonStruct[col] = strVal } - case qvalue.QValueJSON: + case types.QValueJSON: if len(v.Val) > 15*1024*1024 { jsonStruct[col] = "{}" } else if _, ok := opts.UnnestColumns[col]; ok { @@ -113,7 +114,7 @@ func (r RecordItems) toMap(opts ToJSONOptions) (map[string]any, error) { } else { jsonStruct[col] = v.Val } - case qvalue.QValueHStore: + case types.QValueHStore: hstoreVal := v.Val if !opts.HStoreAsJSON { @@ -131,38 +132,45 @@ func (r RecordItems) toMap(opts ToJSONOptions) (map[string]any, error) { } } - case qvalue.QValueTimestamp: + case types.QValueTimestamp: jsonStruct[col] = v.Val.Format("2006-01-02 15:04:05.999999") - case qvalue.QValueTimestampTZ: + case types.QValueTimestampTZ: jsonStruct[col] = v.Val.Format("2006-01-02 15:04:05.999999-0700") - case qvalue.QValueDate: + case types.QValueDate: jsonStruct[col] = v.Val.Format("2006-01-02") - case qvalue.QValueTime: - jsonStruct[col] = v.Val.Format("15:04:05.999999") - case qvalue.QValueTimeTZ: - jsonStruct[col] = v.Val.Format("15:04:05.999999") - case qvalue.QValueArrayDate: + case types.QValueTime: + jsonStruct[col] = time.Time{}.Add(v.Val).Format("15:04:05.999999") + case types.QValueTimeTZ: + jsonStruct[col] = time.Time{}.Add(v.Val).Format("15:04:05.999999") + case types.QValueArrayDate: dateArr := v.Val formattedDateArr := make([]string, 0, len(dateArr)) for _, val := range dateArr { formattedDateArr = append(formattedDateArr, val.Format("2006-01-02")) } jsonStruct[col] = formattedDateArr - case qvalue.QValueNumeric: + case types.QValueNumeric: jsonStruct[col] = v.Val.String() - case qvalue.QValueFloat64: + case types.QValueArrayNumeric: + numericArr := v.Val + strArr := make([]any, 0, len(numericArr)) + for _, val := range numericArr { + strArr = append(strArr, val.String()) + } + jsonStruct[col] = strArr + case types.QValueFloat64: if math.IsNaN(v.Val) || math.IsInf(v.Val, 0) { jsonStruct[col] = nil } else { jsonStruct[col] = v.Val } - case qvalue.QValueFloat32: + case types.QValueFloat32: if math.IsNaN(float64(v.Val)) || math.IsInf(float64(v.Val), 0) { jsonStruct[col] = nil } else { jsonStruct[col] = v.Val } - case qvalue.QValueArrayFloat64: + case types.QValueArrayFloat64: floatArr := v.Val nullableFloatArr := make([]any, 0, len(floatArr)) for _, val := range floatArr { @@ -173,7 +181,7 @@ func (r RecordItems) toMap(opts ToJSONOptions) (map[string]any, error) { } } jsonStruct[col] = nullableFloatArr - case qvalue.QValueArrayFloat32: + case types.QValueArrayFloat32: floatArr := v.Val nullableFloatArr := make([]any, 0, len(floatArr)) for _, val := range floatArr { diff --git a/flow/pua/peerdb.go b/flow/pua/peerdb.go index 3a9308d829..5d9af6e683 100644 --- a/flow/pua/peerdb.go +++ b/flow/pua/peerdb.go @@ -20,8 +20,8 @@ import ( "github.com/PeerDB-io/gluautf8" "github.com/PeerDB-io/peerdb/flow/internal" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) var ( @@ -146,7 +146,7 @@ func LoadPeerdbScript(ls *lua.LState) int { return 1 } -func GetRowQ(ls *lua.LState, row model.RecordItems, col string) qvalue.QValue { +func GetRowQ(ls *lua.LState, row model.RecordItems, col string) types.QValue { qv, err := row.GetValueByColName(col) if err != nil { ls.RaiseError("%s", err.Error()) @@ -175,6 +175,20 @@ func LVAsTime(ls *lua.LState, lv lua.LValue) time.Time { return time.Time{} } +func LVAsDuration(ls *lua.LState, lv lua.LValue) time.Duration { + switch v := lv.(type) { + case lua.LNumber: + ipart, fpart := math.Modf(float64(v)) + return time.Duration(ipart)*time.Second + time.Duration(fpart*1e9) + case *lua.LUserData: + if tm, ok := v.Value.(time.Time); ok { + return tm.Sub(time.Unix(0, 0)) + } + } + ls.RaiseError("Cannot convert %T to time.Time", lv) + return 0 +} + func LuaRowNewIndex(ls *lua.LState) int { _, row := LuaRow.Check(ls, 1) key := ls.CheckString(2) @@ -182,187 +196,205 @@ func LuaRowNewIndex(ls *lua.LState) int { qv := row.GetColumnValue(key) kind := qv.Kind() if val == lua.LNil { - row.AddColumn(key, qvalue.QValueNull(kind)) + row.AddColumn(key, types.QValueNull(kind)) } - var newqv qvalue.QValue + var newqv types.QValue switch kind { - case qvalue.QValueKindInvalid: - newqv = qvalue.QValueInvalid{Val: lua.LVAsString(val)} - case qvalue.QValueKindFloat32: - newqv = qvalue.QValueFloat32{Val: float32(lua.LVAsNumber(val))} - case qvalue.QValueKindFloat64: - newqv = qvalue.QValueFloat64{Val: float64(lua.LVAsNumber(val))} - case qvalue.QValueKindInt8: - newqv = qvalue.QValueInt8{Val: int8(lua.LVAsNumber(val))} - case qvalue.QValueKindInt16: - newqv = qvalue.QValueInt16{Val: int16(lua.LVAsNumber(val))} - case qvalue.QValueKindInt32: - newqv = qvalue.QValueInt32{Val: int32(lua.LVAsNumber(val))} - case qvalue.QValueKindInt64: + case types.QValueKindInvalid: + newqv = types.QValueInvalid{Val: lua.LVAsString(val)} + case types.QValueKindFloat32: + newqv = types.QValueFloat32{Val: float32(lua.LVAsNumber(val))} + case types.QValueKindFloat64: + newqv = types.QValueFloat64{Val: float64(lua.LVAsNumber(val))} + case types.QValueKindInt8: + newqv = types.QValueInt8{Val: int8(lua.LVAsNumber(val))} + case types.QValueKindInt16: + newqv = types.QValueInt16{Val: int16(lua.LVAsNumber(val))} + case types.QValueKindInt32: + newqv = types.QValueInt32{Val: int32(lua.LVAsNumber(val))} + case types.QValueKindInt64: switch v := val.(type) { case lua.LNumber: - newqv = qvalue.QValueInt64{Val: int64(v)} + newqv = types.QValueInt64{Val: int64(v)} case *lua.LUserData: switch i64 := v.Value.(type) { case int64: - newqv = qvalue.QValueInt64{Val: i64} + newqv = types.QValueInt64{Val: i64} case uint64: - newqv = qvalue.QValueInt64{Val: int64(i64)} + newqv = types.QValueInt64{Val: int64(i64)} } } if newqv == nil { ls.RaiseError("invalid int64") } - case qvalue.QValueKindUInt8: - newqv = qvalue.QValueUInt8{Val: uint8(lua.LVAsNumber(val))} - case qvalue.QValueKindUInt16: - newqv = qvalue.QValueUInt16{Val: uint16(lua.LVAsNumber(val))} - case qvalue.QValueKindUInt32: - newqv = qvalue.QValueUInt32{Val: uint32(lua.LVAsNumber(val))} - case qvalue.QValueKindUInt64: + case types.QValueKindUInt8: + newqv = types.QValueUInt8{Val: uint8(lua.LVAsNumber(val))} + case types.QValueKindUInt16: + newqv = types.QValueUInt16{Val: uint16(lua.LVAsNumber(val))} + case types.QValueKindUInt32: + newqv = types.QValueUInt32{Val: uint32(lua.LVAsNumber(val))} + case types.QValueKindUInt64: switch v := val.(type) { case lua.LNumber: - newqv = qvalue.QValueUInt64{Val: uint64(v)} + newqv = types.QValueUInt64{Val: uint64(v)} case *lua.LUserData: switch i64 := v.Value.(type) { case int64: - newqv = qvalue.QValueUInt64{Val: uint64(i64)} + newqv = types.QValueUInt64{Val: uint64(i64)} case uint64: - newqv = qvalue.QValueUInt64{Val: i64} + newqv = types.QValueUInt64{Val: i64} } } if newqv == nil { ls.RaiseError("invalid uint64") } - case qvalue.QValueKindBoolean: - newqv = qvalue.QValueBoolean{Val: lua.LVAsBool(val)} - case qvalue.QValueKindQChar: + case types.QValueKindBoolean: + newqv = types.QValueBoolean{Val: lua.LVAsBool(val)} + case types.QValueKindQChar: switch v := val.(type) { case lua.LNumber: - newqv = qvalue.QValueQChar{Val: uint8(v)} + newqv = types.QValueQChar{Val: uint8(v)} case lua.LString: if len(v) > 0 { - newqv = qvalue.QValueQChar{Val: v[0]} + newqv = types.QValueQChar{Val: v[0]} } default: ls.RaiseError("invalid \"char\"") } - case qvalue.QValueKindString: - newqv = qvalue.QValueString{Val: lua.LVAsString(val)} - case qvalue.QValueKindEnum: - newqv = qvalue.QValueEnum{Val: lua.LVAsString(val)} - case qvalue.QValueKindTimestamp: - newqv = qvalue.QValueTimestamp{Val: LVAsTime(ls, val)} - case qvalue.QValueKindTimestampTZ: - newqv = qvalue.QValueTimestampTZ{Val: LVAsTime(ls, val)} - case qvalue.QValueKindDate: - newqv = qvalue.QValueDate{Val: LVAsTime(ls, val)} - case qvalue.QValueKindTime: - newqv = qvalue.QValueTime{Val: LVAsTime(ls, val)} - case qvalue.QValueKindTimeTZ: - newqv = qvalue.QValueTimeTZ{Val: LVAsTime(ls, val)} - case qvalue.QValueKindNumeric: - newqv = qvalue.QValueNumeric{Val: LVAsDecimal(ls, val)} - case qvalue.QValueKindBytes: - newqv = qvalue.QValueBytes{Val: []byte(lua.LVAsString(val))} - case qvalue.QValueKindUUID: + case types.QValueKindString: + newqv = types.QValueString{Val: lua.LVAsString(val)} + case types.QValueKindEnum: + newqv = types.QValueEnum{Val: lua.LVAsString(val)} + case types.QValueKindTimestamp: + newqv = types.QValueTimestamp{Val: LVAsTime(ls, val)} + case types.QValueKindTimestampTZ: + newqv = types.QValueTimestampTZ{Val: LVAsTime(ls, val)} + case types.QValueKindDate: + newqv = types.QValueDate{Val: LVAsTime(ls, val)} + case types.QValueKindTime: + newqv = types.QValueTime{Val: LVAsDuration(ls, val)} + case types.QValueKindTimeTZ: + newqv = types.QValueTimeTZ{Val: LVAsDuration(ls, val)} + case types.QValueKindInterval: + newqv = types.QValueInterval{Val: lua.LVAsString(val)} + case types.QValueKindNumeric: + newqv = types.QValueNumeric{Val: LVAsDecimal(ls, val)} + case types.QValueKindBytes: + newqv = types.QValueBytes{Val: []byte(lua.LVAsString(val))} + case types.QValueKindUUID: if ud, ok := val.(*lua.LUserData); ok { if id, ok := ud.Value.(uuid.UUID); ok { - newqv = qvalue.QValueUUID{Val: id} + newqv = types.QValueUUID{Val: id} } } - case qvalue.QValueKindArrayUUID: + case types.QValueKindArrayUUID: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayUUID{ + newqv = types.QValueArrayUUID{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) uuid.UUID { return uuid.MustParse(lua.LVAsString(v)) }), } } - case qvalue.QValueKindJSON: - newqv = qvalue.QValueJSON{Val: lua.LVAsString(val)} - case qvalue.QValueKindArrayFloat32: + case types.QValueKindJSON: + newqv = types.QValueJSON{Val: lua.LVAsString(val)} + case types.QValueKindArrayFloat32: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayFloat32{ + newqv = types.QValueArrayFloat32{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) float32 { return float32(lua.LVAsNumber(v)) }), } } - case qvalue.QValueKindArrayFloat64: + case types.QValueKindArrayFloat64: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayFloat64{ + newqv = types.QValueArrayFloat64{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) float64 { return float64(lua.LVAsNumber(v)) }), } } - case qvalue.QValueKindArrayInt16: + case types.QValueKindArrayInt16: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayFloat64{ + newqv = types.QValueArrayFloat64{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) float64 { return float64(lua.LVAsNumber(v)) }), } } - case qvalue.QValueKindArrayInt32: + case types.QValueKindArrayInt32: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayFloat64{ + newqv = types.QValueArrayFloat64{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) float64 { return float64(lua.LVAsNumber(v)) }), } } - case qvalue.QValueKindArrayInt64: + case types.QValueKindArrayInt64: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayFloat64{ + newqv = types.QValueArrayFloat64{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) float64 { return float64(lua.LVAsNumber(v)) }), } } - case qvalue.QValueKindArrayString: + case types.QValueKindArrayString: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayString{ + newqv = types.QValueArrayString{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) string { return lua.LVAsString(v) }), } } - case qvalue.QValueKindArrayEnum: + case types.QValueKindArrayEnum: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayEnum{ + newqv = types.QValueArrayEnum{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) string { return lua.LVAsString(v) }), } } - case qvalue.QValueKindArrayDate: + case types.QValueKindArrayDate: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayDate{ + newqv = types.QValueArrayDate{ Val: shared.LTableToSlice(ls, tbl, LVAsTime), } } - case qvalue.QValueKindArrayTimestamp: + case types.QValueKindArrayInterval: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayDate{ + newqv = types.QValueArrayInterval{ + Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) string { + return lua.LVAsString(v) + }), + } + } + case types.QValueKindArrayTimestamp: + if tbl, ok := val.(*lua.LTable); ok { + newqv = types.QValueArrayDate{ Val: shared.LTableToSlice(ls, tbl, LVAsTime), } } - case qvalue.QValueKindArrayTimestampTZ: + case types.QValueKindArrayTimestampTZ: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayDate{ + newqv = types.QValueArrayDate{ Val: shared.LTableToSlice(ls, tbl, LVAsTime), } } - case qvalue.QValueKindArrayBoolean: + case types.QValueKindArrayBoolean: if tbl, ok := val.(*lua.LTable); ok { - newqv = qvalue.QValueArrayBoolean{ + newqv = types.QValueArrayBoolean{ Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) bool { return lua.LVAsBool(v) }), } } + case types.QValueKindArrayNumeric: + if tbl, ok := val.(*lua.LTable); ok { + newqv = types.QValueArrayNumeric{ + Val: shared.LTableToSlice(ls, tbl, func(_ *lua.LState, v lua.LValue) decimal.Decimal { + return LVAsDecimal(ls, val) + }), + } + } default: ls.RaiseError("no support for reassigning %s", kind) return 0 diff --git a/flow/pua/peerdb_test.go b/flow/pua/peerdb_test.go index 9fdd73f004..4c3dbbd44f 100644 --- a/flow/pua/peerdb_test.go +++ b/flow/pua/peerdb_test.go @@ -6,7 +6,7 @@ import ( lua "github.com/yuin/gopher-lua" "github.com/PeerDB-io/peerdb/flow/model" - "github.com/PeerDB-io/peerdb/flow/model/qvalue" + "github.com/PeerDB-io/peerdb/flow/shared/types" ) func assert(t *testing.T, ls *lua.LState, source string) { @@ -25,11 +25,11 @@ func Test(t *testing.T) { RegisterTypes(ls) row := model.NewRecordItems(1) - row.AddColumn("a", qvalue.QValueInt64{Val: 5040}) + row.AddColumn("a", types.QValueInt64{Val: 5040}) ls.Env.RawSetString("row", LuaRow.New(ls, row)) row_empty_array := model.NewRecordItems(1) - row_empty_array.AddColumn("a", qvalue.QValueArrayInt32{Val: nil}) + row_empty_array.AddColumn("a", types.QValueArrayInt32{Val: nil}) ls.Env.RawSetString("row_empty_array", LuaRow.New(ls, row_empty_array)) assert(t, ls, ` diff --git a/flow/shared/clickhouse/escape.go b/flow/shared/clickhouse/escape.go index 43c9fd074b..7a3bedec61 100644 --- a/flow/shared/clickhouse/escape.go +++ b/flow/shared/clickhouse/escape.go @@ -2,24 +2,51 @@ package clickhouse import "strings" -const mustEscape = "\t\n`'\\" +func mustEscape(char byte) bool { + return char == '\'' || char == '`' || char == '\\' || char == '\t' || char == '\n' +} -func EscapeStr(value string) string { - var result strings.Builder - for _, c := range value { - if strings.ContainsRune(mustEscape, c) { - result.WriteRune('\\') +// escaped size only needs to iterate on bytes, ASCII will never appear within multibyte utf8 characters +func escapeSize(value string) int { + size := len(value) + for idx := range len(value) { + if mustEscape(value[idx]) { + size += 1 + } + } + return size +} + +func escape(result *strings.Builder, value string) { + for idx := range len(value) { + if mustEscape(value[idx]) { + result.WriteByte('\\') } - result.WriteRune(c) + result.WriteByte(value[idx]) } +} +func EscapeStr(value string) string { + var result strings.Builder + result.Grow(escapeSize(value)) + escape(&result, value) return result.String() } func QuoteLiteral(value string) string { - return "'" + EscapeStr(value) + "'" + var result strings.Builder + result.Grow(escapeSize(value) + 2) + result.WriteByte('\'') + escape(&result, value) + result.WriteByte('\'') + return result.String() } func QuoteIdentifier(value string) string { - return "`" + EscapeStr(value) + "`" + var result strings.Builder + result.Grow(escapeSize(value) + 2) + result.WriteByte('`') + escape(&result, value) + result.WriteByte('`') + return result.String() } diff --git a/flow/shared/clickhouse/query_retry.go b/flow/shared/clickhouse/query_retry.go index 8cc3f68aa2..6d7f93d6d3 100644 --- a/flow/shared/clickhouse/query_retry.go +++ b/flow/shared/clickhouse/query_retry.go @@ -53,7 +53,7 @@ func Exec(ctx context.Context, logger log.Logger, if !isRetryableException(err) { break } - logger.Info("[exec] retryable error", slog.Any("error", err), slog.String("query", query), slog.Int64("retry", int64(i))) + logger.Info("[exec] retryable error", slog.Any("error", err), slog.Int64("retry", int64(i))) if i < 4 { time.Sleep(time.Second * time.Duration(i*5+1)) } @@ -71,7 +71,7 @@ func Query(ctx context.Context, logger log.Logger, if !isRetryableException(err) { break } - logger.Info("[query] retryable error", slog.Any("error", err), slog.String("query", query), slog.Int64("retry", int64(i))) + logger.Info("[query] retryable error", slog.Any("error", err), slog.Int64("retry", int64(i))) if i < 4 { time.Sleep(time.Second * time.Duration(i*5+1)) } @@ -89,7 +89,7 @@ func QueryRow(ctx context.Context, logger log.Logger, if !isRetryableException(err) { break } - logger.Info("[queryRow] retryable error", slog.Any("error", err), slog.String("query", query), slog.Int64("retry", int64(i))) + logger.Info("[queryRow] retryable error", slog.Any("error", err), slog.Int64("retry", int64(i))) if i < 4 { time.Sleep(time.Second * time.Duration(i*5+1)) } diff --git a/flow/shared/clickhouse/type_conversion.go b/flow/shared/clickhouse/type_conversion.go new file mode 100644 index 0000000000..eede738a7f --- /dev/null +++ b/flow/shared/clickhouse/type_conversion.go @@ -0,0 +1,46 @@ +package clickhouse + +import ( + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +/* +This file handles the mapping for ClickHouse destination types and +their corresponding TypeConversion implementations. A TypeConversion +object contains two functions: one for schema conversion (QField) and +one for value conversion (QValue). This allows the avro writer to +stage the schema/data in the converted type format, and therefore +successfully uploaded to the desired destination type in ClickHouse. + +To add a type conversion: + + (1) In flow/model/shared/type_converter.go: + - implement a SchemaConversionFn interface to convert the QField type + - implement a ValueConversionFn interface to convert the QValue data + + (2) Add the new conversion to the `supportedDestinationTypes` map here + (if destination type doesn't exist, create a new map entry for it). +*/ +var SupportedDestinationTypes = map[string][]types.TypeConversion{ + "String": {types.NewTypeConversion( + types.NumericToStringSchemaConversion, + types.NumericToStringValueConversion, + )}, +} + +var NumericDestinationTypes = map[string]struct{}{ + "String": {}, +} + +// returns the full list of supported type conversions. The keys are +// QValueKind to allows the implementation to be source-connector agnostic. +func ListSupportedTypeConversions() map[types.QValueKind][]string { + typeConversions := make(map[types.QValueKind][]string) + + for dstType, l := range SupportedDestinationTypes { + for _, conversion := range l { + typeConversions[conversion.FromKind()] = append(typeConversions[conversion.FromKind()], dstType) + } + } + return typeConversions +} diff --git a/flow/shared/clickhouse/validation.go b/flow/shared/clickhouse/validation.go index c528d961df..c1d60f46a7 100644 --- a/flow/shared/clickhouse/validation.go +++ b/flow/shared/clickhouse/validation.go @@ -33,13 +33,13 @@ func CheckIfClickHouseCloudHasSharedMergeTreeEnabled(ctx context.Context, logger func CheckIfTablesEmptyAndEngine(ctx context.Context, logger log.Logger, conn clickhouse.Conn, tables []string, initialSnapshotEnabled bool, checkForCloudSMT bool, ) error { - queryInput := make([]any, 0, len(tables)) + queryTables := make([]string, 0, len(tables)) for _, table := range tables { - queryInput = append(queryInput, table) + queryTables = append(queryTables, QuoteLiteral(table)) } rows, err := Query(ctx, logger, conn, fmt.Sprintf("SELECT name,engine,total_rows FROM system.tables WHERE database=currentDatabase() AND name IN (%s)", - strings.Join(slices.Repeat([]string{"?"}, len(tables)), ",")), queryInput...) + strings.Join(queryTables, ","))) if err != nil { return fmt.Errorf("failed to get information for destination tables: %w", err) } @@ -78,13 +78,13 @@ func GetTableColumnsMapping(ctx context.Context, logger log.Logger, conn clickho tables []string, ) (map[string][]ClickHouseColumn, error) { tableColumnsMapping := make(map[string][]ClickHouseColumn, len(tables)) - queryInput := make([]any, 0, len(tables)) + queryTables := make([]string, 0, len(tables)) for _, table := range tables { - queryInput = append(queryInput, table) + queryTables = append(queryTables, QuoteLiteral(table)) } rows, err := Query(ctx, logger, conn, fmt.Sprintf("SELECT name,type,table FROM system.columns WHERE database=currentDatabase() AND table IN (%s)", - strings.Join(slices.Repeat([]string{"?"}, len(tables)), ",")), queryInput...) + strings.Join(queryTables, ","))) if err != nil { return nil, fmt.Errorf("failed to get columns for destination tables: %w", err) } diff --git a/flow/shared/constants.go b/flow/shared/constants.go index b9d40a3243..b297c1e019 100644 --- a/flow/shared/constants.go +++ b/flow/shared/constants.go @@ -1,9 +1,28 @@ package shared import ( + "time" + "go.temporal.io/sdk/temporal" ) +var Year0000 = time.Date(0, 1, 1, 0, 0, 0, 0, time.UTC) + +const ( + MoneyOID uint32 = 790 + TxidSnapshotOID uint32 = 2970 + TsvectorOID uint32 = 3614 + TsqueryOID uint32 = 3615 +) + +const ( + InternalVersion_First uint32 = iota + InternalVersion_PgVectorAsFloatArray + + TotalNumberOfInternalVersions + InternalVersion_Latest = TotalNumberOfInternalVersions - 1 +) + type ( ContextKey string TaskQueueID string diff --git a/flow/datatypes/bigint.go b/flow/shared/datatypes/bigint.go similarity index 100% rename from flow/datatypes/bigint.go rename to flow/shared/datatypes/bigint.go diff --git a/flow/datatypes/bigint_test.go b/flow/shared/datatypes/bigint_test.go similarity index 100% rename from flow/datatypes/bigint_test.go rename to flow/shared/datatypes/bigint_test.go diff --git a/flow/datatypes/geo.go b/flow/shared/datatypes/geo.go similarity index 100% rename from flow/datatypes/geo.go rename to flow/shared/datatypes/geo.go diff --git a/flow/datatypes/geo_test.go b/flow/shared/datatypes/geo_test.go similarity index 100% rename from flow/datatypes/geo_test.go rename to flow/shared/datatypes/geo_test.go diff --git a/flow/datatypes/hstore.go b/flow/shared/datatypes/hstore.go similarity index 100% rename from flow/datatypes/hstore.go rename to flow/shared/datatypes/hstore.go diff --git a/flow/datatypes/hstore_test.go b/flow/shared/datatypes/hstore_test.go similarity index 100% rename from flow/datatypes/hstore_test.go rename to flow/shared/datatypes/hstore_test.go diff --git a/flow/datatypes/interval.go b/flow/shared/datatypes/interval.go similarity index 100% rename from flow/datatypes/interval.go rename to flow/shared/datatypes/interval.go diff --git a/flow/datatypes/numeric.go b/flow/shared/datatypes/numeric.go similarity index 99% rename from flow/datatypes/numeric.go rename to flow/shared/datatypes/numeric.go index 8b942e4f67..6a6310abe2 100644 --- a/flow/datatypes/numeric.go +++ b/flow/shared/datatypes/numeric.go @@ -23,7 +23,7 @@ func (ClickHouseNumericCompatibility) MaxPrecision() int16 { } func (ClickHouseNumericCompatibility) MaxScale() int16 { - return 38 + return 76 } func (c ClickHouseNumericCompatibility) DefaultPrecisionAndScale() (int16, int16) { diff --git a/flow/shared/err_types.go b/flow/shared/err_types.go index 2b345ccd42..a3a694e423 100644 --- a/flow/shared/err_types.go +++ b/flow/shared/err_types.go @@ -29,3 +29,5 @@ func SkipSendingToIncidentIo(errTags []string) bool { } return false } + +type QRepWarnings []error diff --git a/flow/shared/exceptions/numeric.go b/flow/shared/exceptions/numeric.go new file mode 100644 index 0000000000..880607b220 --- /dev/null +++ b/flow/shared/exceptions/numeric.go @@ -0,0 +1,37 @@ +package exceptions + +type NumericTruncatedError struct { + error + DestinationTable string + DestinationColumn string +} + +func NewNumericTruncatedError(err error, destinationTable, destinationColumn string) *NumericTruncatedError { + return &NumericTruncatedError{err, destinationTable, destinationColumn} +} + +func (e *NumericTruncatedError) Error() string { + return e.error.Error() +} + +func (e *NumericTruncatedError) Unwrap() error { + return e.error +} + +type NumericOutOfRangeError struct { + error + DestinationTable string + DestinationColumn string +} + +func NewNumericOutOfRangeError(err error, destinationTable, destinationColumn string) *NumericOutOfRangeError { + return &NumericOutOfRangeError{err, destinationTable, destinationColumn} +} + +func (e *NumericOutOfRangeError) Error() string { + return e.error.Error() +} + +func (e *NumericOutOfRangeError) Unwrap() error { + return e.error +} diff --git a/flow/shared/mysql/type_conversion.go b/flow/shared/mysql/type_conversion.go new file mode 100644 index 0000000000..968dbc50f7 --- /dev/null +++ b/flow/shared/mysql/type_conversion.go @@ -0,0 +1,67 @@ +package mysql + +import ( + "fmt" + "strings" + + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +func QkindFromMysqlColumnType(ct string) (types.QValueKind, error) { + ct, isUnsigned := strings.CutSuffix(ct, " unsigned") + ct, param, _ := strings.Cut(ct, "(") + switch strings.ToLower(ct) { + case "json": + return types.QValueKindJSON, nil + case "char", "varchar", "text", "set", "tinytext", "mediumtext", "longtext": + return types.QValueKindString, nil + case "enum": + return types.QValueKindEnum, nil + case "binary", "varbinary", "blob", "tinyblob", "mediumblob", "longblob": + return types.QValueKindBytes, nil + case "date": + return types.QValueKindDate, nil + case "datetime", "timestamp", "time": + return types.QValueKindTimestamp, nil + case "decimal", "numeric": + return types.QValueKindNumeric, nil + case "float": + return types.QValueKindFloat32, nil + case "double": + return types.QValueKindFloat64, nil + case "tinyint": + if strings.HasPrefix(param, "1)") { + return types.QValueKindBoolean, nil + } else if isUnsigned { + return types.QValueKindUInt8, nil + } else { + return types.QValueKindInt8, nil + } + case "smallint", "year": + if isUnsigned { + return types.QValueKindUInt16, nil + } else { + return types.QValueKindInt16, nil + } + case "mediumint", "int": + if isUnsigned { + return types.QValueKindUInt32, nil + } else { + return types.QValueKindInt32, nil + } + case "bit": + return types.QValueKindUInt64, nil + case "bigint": + if isUnsigned { + return types.QValueKindUInt64, nil + } else { + return types.QValueKindInt64, nil + } + case "vector": + return types.QValueKindArrayFloat32, nil + case "geometry", "point", "polygon", "linestring", "multipoint", "multipolygon", "geomcollection": + return types.QValueKindGeometry, nil + default: + return types.QValueKind(""), fmt.Errorf("unknown mysql type %s", ct) + } +} diff --git a/flow/shared/postgres.go b/flow/shared/postgres.go index 45a9bc76f7..1207ce1020 100644 --- a/flow/shared/postgres.go +++ b/flow/shared/postgres.go @@ -12,6 +12,7 @@ import ( "github.com/jackc/pgx/v5/pgconn" "github.com/jackc/pgx/v5/pgtype" "github.com/jackc/pgx/v5/pgxpool" + pgvectorpgx "github.com/pgvector/pgvector-go/pgx" "go.temporal.io/sdk/log" "github.com/PeerDB-io/peerdb/flow/shared/exceptions" @@ -59,17 +60,33 @@ func GetCustomDataTypes(ctx context.Context, conn *pgx.Conn) (map[uint32]CustomD return customTypeMap, nil } -func RegisterHStore(ctx context.Context, conn *pgx.Conn) error { - var hstoreOID uint32 - if err := conn.QueryRow(ctx, `select oid from pg_type where typname = 'hstore'`).Scan(&hstoreOID); err != nil { - // hstore isn't present, just proceed - if errors.Is(err, pgx.ErrNoRows) { - return nil - } +func RegisterExtensions(ctx context.Context, conn *pgx.Conn, version uint32) error { + var hstoreOID *uint32 + var vectorOID *uint32 + var halfvecOID *uint32 + var sparsevecOID *uint32 + if err := conn.QueryRow( + ctx, "select to_regtype('hstore')::oid,to_regtype('vector')::oid,to_regtype('halfvec')::oid,to_regtype('sparsevec')::oid", + ).Scan(&hstoreOID, &vectorOID, &halfvecOID, &sparsevecOID); err != nil { return err } - conn.TypeMap().RegisterType(&pgtype.Type{Name: "hstore", OID: hstoreOID, Codec: pgtype.HstoreCodec{}}) + typeMap := conn.TypeMap() + if hstoreOID != nil { + typeMap.RegisterType(&pgtype.Type{Name: "hstore", OID: *hstoreOID, Codec: pgtype.HstoreCodec{}}) + } + + if version >= InternalVersion_PgVectorAsFloatArray { + if vectorOID != nil { + typeMap.RegisterType(&pgtype.Type{Name: "vector", OID: *vectorOID, Codec: pgvectorpgx.VectorCodec{}}) + if halfvecOID != nil { + typeMap.RegisterType(&pgtype.Type{Name: "halfvec", OID: *halfvecOID, Codec: pgvectorpgx.HalfVectorCodec{}}) + } + if sparsevecOID != nil { + typeMap.RegisterType(&pgtype.Type{Name: "sparsevec", OID: *sparsevecOID, Codec: pgvectorpgx.SparseVectorCodec{}}) + } + } + } return nil } @@ -273,6 +290,10 @@ const ( psQuotedEscape psUnquoted psUnquotedEscape + psN + psNU + psNUL + psNULL ) // see array_in from postgres @@ -285,6 +306,7 @@ func ParsePgArrayToStringSlice(data []byte, delim byte) []string { var sb []byte ps := psSearch2 for _, ch := range data { + retry: switch ps { case psSearch: if ch == delim { @@ -295,6 +317,8 @@ func ParsePgArrayToStringSlice(data []byte, delim byte) []string { ps = psQuoted } else if ch == '\\' { ps = psUnquotedEscape + } else if ch == 'N' { + ps = psN } else if ch != '{' && ch != ' ' && ch != '\t' && ch != '\n' && ch != '\v' && ch != '\f' && ch != '\r' { sb = append(sb, ch) ps = psUnquoted @@ -333,6 +357,43 @@ func ParsePgArrayToStringSlice(data []byte, delim byte) []string { case psUnquotedEscape: sb = append(sb, ch) ps = psUnquoted + case psN: + if ch == 'U' { + ps = psNU + } else { + sb = append(sb, 'N') + ps = psUnquoted + goto retry + } + case psNU: + if ch == 'L' { + ps = psNUL + } else { + sb = append(sb, 'N', 'U') + ps = psUnquoted + goto retry + } + case psNUL: + if ch == 'L' { + ps = psNULL + } else { + sb = append(sb, 'N', 'U', 'L') + ps = psUnquoted + goto retry + } + case psNULL: + if ch == delim || ch == '}' { + result = append(result, "") + if ch == '}' { + ps = psSearch2 + } else { + ps = psSearch + } + } else { + sb = append(sb, 'N', 'U', 'L', 'L') + ps = psUnquoted + goto retry + } } } return result diff --git a/flow/shared/postgres/type_conversion.go b/flow/shared/postgres/type_conversion.go new file mode 100644 index 0000000000..f1f8e46919 --- /dev/null +++ b/flow/shared/postgres/type_conversion.go @@ -0,0 +1,146 @@ +package postgres + +import ( + "errors" + + "github.com/jackc/pgx/v5/pgtype" + + "github.com/PeerDB-io/peerdb/flow/shared" + "github.com/PeerDB-io/peerdb/flow/shared/types" +) + +func PostgresOIDToQValueKind( + recvOID uint32, + customTypeMapping map[uint32]shared.CustomDataType, + typeMap *pgtype.Map, + version uint32, +) (types.QValueKind, error) { + switch recvOID { + case pgtype.BoolOID: + return types.QValueKindBoolean, nil + case pgtype.Int2OID: + return types.QValueKindInt16, nil + case pgtype.Int4OID: + return types.QValueKindInt32, nil + case pgtype.Int8OID: + return types.QValueKindInt64, nil + case pgtype.Float4OID: + return types.QValueKindFloat32, nil + case pgtype.Float8OID: + return types.QValueKindFloat64, nil + case pgtype.QCharOID: + return types.QValueKindQChar, nil + case pgtype.TextOID, pgtype.VarcharOID, pgtype.BPCharOID: + return types.QValueKindString, nil + case pgtype.ByteaOID: + return types.QValueKindBytes, nil + case pgtype.JSONOID: + return types.QValueKindJSON, nil + case pgtype.JSONBOID: + return types.QValueKindJSONB, nil + case pgtype.UUIDOID: + return types.QValueKindUUID, nil + case pgtype.TimeOID: + return types.QValueKindTime, nil + case pgtype.DateOID: + return types.QValueKindDate, nil + case pgtype.CIDROID: + return types.QValueKindCIDR, nil + case pgtype.MacaddrOID: + return types.QValueKindMacaddr, nil + case pgtype.InetOID: + return types.QValueKindINET, nil + case pgtype.TimestampOID: + return types.QValueKindTimestamp, nil + case pgtype.TimestamptzOID: + return types.QValueKindTimestampTZ, nil + case pgtype.NumericOID: + return types.QValueKindNumeric, nil + case pgtype.Int2ArrayOID: + return types.QValueKindArrayInt16, nil + case pgtype.Int4ArrayOID: + return types.QValueKindArrayInt32, nil + case pgtype.Int8ArrayOID: + return types.QValueKindArrayInt64, nil + case pgtype.PointOID: + return types.QValueKindPoint, nil + case pgtype.Float4ArrayOID: + return types.QValueKindArrayFloat32, nil + case pgtype.Float8ArrayOID: + return types.QValueKindArrayFloat64, nil + case pgtype.BoolArrayOID: + return types.QValueKindArrayBoolean, nil + case pgtype.DateArrayOID: + return types.QValueKindArrayDate, nil + case pgtype.TimestampArrayOID: + return types.QValueKindArrayTimestamp, nil + case pgtype.TimestamptzArrayOID: + return types.QValueKindArrayTimestampTZ, nil + case pgtype.UUIDArrayOID: + return types.QValueKindArrayUUID, nil + case pgtype.TextArrayOID, pgtype.VarcharArrayOID, pgtype.BPCharArrayOID: + return types.QValueKindArrayString, nil + case pgtype.JSONArrayOID: + return types.QValueKindArrayJSON, nil + case pgtype.JSONBArrayOID: + return types.QValueKindArrayJSONB, nil + case pgtype.NumericArrayOID: + return types.QValueKindArrayNumeric, nil + case pgtype.IntervalOID: + return types.QValueKindInterval, nil + case pgtype.IntervalArrayOID: + return types.QValueKindArrayInterval, nil + default: + if typeName, ok := typeMap.TypeForOID(recvOID); ok { + colType := types.QValueKindString + if typeData, ok := customTypeMapping[recvOID]; ok { + colType = CustomTypeToQKind(typeData, version) + } + return colType, errors.New(typeName.Name) + } else { + // workaround for some types not being defined by pgtype + switch recvOID { + case pgtype.TimetzOID: + return types.QValueKindTimeTZ, nil + case pgtype.PointOID: + return types.QValueKindPoint, nil + default: + if typeData, ok := customTypeMapping[recvOID]; ok { + return CustomTypeToQKind(typeData, version), nil + } + return types.QValueKindString, nil + } + } + } +} + +func CustomTypeToQKind(typeData shared.CustomDataType, version uint32) types.QValueKind { + if typeData.Type == 'e' { + if typeData.Delim != 0 { + return types.QValueKindArrayEnum + } else { + return types.QValueKindEnum + } + } + + if typeData.Delim != 0 { + return types.QValueKindArrayString + } + + switch typeData.Name { + case "geometry": + return types.QValueKindGeometry + case "geography": + return types.QValueKindGeography + case "hstore": + return types.QValueKindHStore + case "vector", "halfvec", "sparsevec": + if version >= shared.InternalVersion_PgVectorAsFloatArray { + return types.QValueKindArrayFloat32 + } else { + return types.QValueKindString + } + default: + return types.QValueKindString + } +} diff --git a/flow/shared/shared_test.go b/flow/shared/shared_test.go index 457d8a15cf..38307d6542 100644 --- a/flow/shared/shared_test.go +++ b/flow/shared/shared_test.go @@ -136,6 +136,10 @@ func TestParsePgArray(t *testing.T) { {"[1:2]{1,2,\\\"3}", []string{"1", "2", "\"3"}}, {"{ 1, \"a\\\"b\", 3\"2\"\\\\}", []string{"1", "a\"b", "32\\"}}, {"{{1,2},{3},{4,5},{{6,7,8},{9}}}", []string{"1", "2", "3", "4", "5", "6", "7", "8", "9"}}, + { + "{{N,NU,NUL,NULL,NUL\\L,\\NULL,NULLL},{{NO,NUO,NULO,\"NULL\"},{ NULL}}}", + []string{"N", "NU", "NUL", "", "NULL", "NULL", "NULLL", "NO", "NUO", "NULO", "NULL", ""}, + }, {"{}", nil}, {"{,}", []string{""}}, } diff --git a/flow/shared/string.go b/flow/shared/string.go index b6295ab00d..ccd6e60758 100644 --- a/flow/shared/string.go +++ b/flow/shared/string.go @@ -1,8 +1,12 @@ package shared import ( + "net" "regexp" + "strconv" "unsafe" + + "golang.org/x/exp/constraints" ) func UnsafeFastStringToReadOnlyBytes(s string) []byte { @@ -25,3 +29,7 @@ func ReplaceIllegalCharactersWithUnderscores(s string) string { func IsValidReplicationName(s string) bool { return reLegalIdentifierLower.MatchString(s) } + +func JoinHostPort[I constraints.Integer](host string, port I) string { + return net.JoinHostPort(host, strconv.FormatUint(uint64(port), 10)) +} diff --git a/flow/shared/types/kind.go b/flow/shared/types/kind.go new file mode 100644 index 0000000000..342d5d8602 --- /dev/null +++ b/flow/shared/types/kind.go @@ -0,0 +1,160 @@ +package types + +import ( + "strings" +) + +type QValueKind string + +const ( + QValueKindInvalid QValueKind = "invalid" + QValueKindFloat32 QValueKind = "float32" + QValueKindFloat64 QValueKind = "float64" + QValueKindInt8 QValueKind = "int8" + QValueKindInt16 QValueKind = "int16" + QValueKindInt32 QValueKind = "int32" + QValueKindInt64 QValueKind = "int64" + QValueKindUInt8 QValueKind = "uint8" + QValueKindUInt16 QValueKind = "uint16" + QValueKindUInt32 QValueKind = "uint32" + QValueKindUInt64 QValueKind = "uint64" + QValueKindBoolean QValueKind = "bool" + QValueKindQChar QValueKind = "qchar" + QValueKindString QValueKind = "string" + QValueKindEnum QValueKind = "enum" + QValueKindTimestamp QValueKind = "timestamp" + QValueKindTimestampTZ QValueKind = "timestamptz" + QValueKindDate QValueKind = "date" + QValueKindTime QValueKind = "time" + QValueKindTimeTZ QValueKind = "timetz" + QValueKindInterval QValueKind = "interval" + QValueKindNumeric QValueKind = "numeric" + QValueKindBytes QValueKind = "bytes" + QValueKindUUID QValueKind = "uuid" + QValueKindJSON QValueKind = "json" + QValueKindJSONB QValueKind = "jsonb" + QValueKindHStore QValueKind = "hstore" + QValueKindGeography QValueKind = "geography" + QValueKindGeometry QValueKind = "geometry" + QValueKindPoint QValueKind = "point" + + // network types + QValueKindCIDR QValueKind = "cidr" + QValueKindINET QValueKind = "inet" + QValueKindMacaddr QValueKind = "macaddr" + + // array types + QValueKindArrayFloat32 QValueKind = "array_float32" + QValueKindArrayFloat64 QValueKind = "array_float64" + QValueKindArrayInt16 QValueKind = "array_int16" + QValueKindArrayInt32 QValueKind = "array_int32" + QValueKindArrayInt64 QValueKind = "array_int64" + QValueKindArrayString QValueKind = "array_string" + QValueKindArrayEnum QValueKind = "array_enum" + QValueKindArrayDate QValueKind = "array_date" + QValueKindArrayInterval QValueKind = "array_interval" + QValueKindArrayTimestamp QValueKind = "array_timestamp" + QValueKindArrayTimestampTZ QValueKind = "array_timestamptz" + QValueKindArrayBoolean QValueKind = "array_bool" + QValueKindArrayJSON QValueKind = "array_json" + QValueKindArrayJSONB QValueKind = "array_jsonb" + QValueKindArrayUUID QValueKind = "array_uuid" + QValueKindArrayNumeric QValueKind = "array_numeric" +) + +func (kind QValueKind) IsArray() bool { + return strings.HasPrefix(string(kind), "array_") +} + +var QValueKindToSnowflakeTypeMap = map[QValueKind]string{ + QValueKindBoolean: "BOOLEAN", + QValueKindInt8: "INTEGER", + QValueKindInt16: "INTEGER", + QValueKindInt32: "INTEGER", + QValueKindInt64: "INTEGER", + QValueKindUInt8: "INTEGER", + QValueKindUInt16: "INTEGER", + QValueKindUInt32: "INTEGER", + QValueKindUInt64: "INTEGER", + QValueKindFloat32: "FLOAT", + QValueKindFloat64: "FLOAT", + QValueKindQChar: "CHAR", + QValueKindString: "STRING", + QValueKindEnum: "STRING", + QValueKindJSON: "VARIANT", + QValueKindJSONB: "VARIANT", + QValueKindTimestamp: "TIMESTAMP_NTZ", + QValueKindTimestampTZ: "TIMESTAMP_TZ", + QValueKindInterval: "VARIANT", + QValueKindTime: "TIME", + QValueKindTimeTZ: "TIME", + QValueKindDate: "DATE", + QValueKindBytes: "BINARY", + QValueKindUUID: "STRING", + QValueKindInvalid: "STRING", + QValueKindHStore: "VARIANT", + QValueKindGeography: "GEOGRAPHY", + QValueKindGeometry: "GEOMETRY", + QValueKindPoint: "GEOMETRY", + + // array types will be mapped to VARIANT + QValueKindArrayFloat32: "VARIANT", + QValueKindArrayFloat64: "VARIANT", + QValueKindArrayInt32: "VARIANT", + QValueKindArrayInt64: "VARIANT", + QValueKindArrayInt16: "VARIANT", + QValueKindArrayString: "VARIANT", + QValueKindArrayEnum: "VARIANT", + QValueKindArrayDate: "VARIANT", + QValueKindArrayInterval: "VARIANT", + QValueKindArrayTimestamp: "VARIANT", + QValueKindArrayTimestampTZ: "VARIANT", + QValueKindArrayBoolean: "VARIANT", + QValueKindArrayJSON: "VARIANT", + QValueKindArrayJSONB: "VARIANT", + QValueKindArrayUUID: "VARIANT", + QValueKindArrayNumeric: "VARIANT", +} + +var QValueKindToClickHouseTypeMap = map[QValueKind]string{ + QValueKindBoolean: "Bool", + QValueKindInt8: "Int8", + QValueKindInt16: "Int16", + QValueKindInt32: "Int32", + QValueKindInt64: "Int64", + QValueKindUInt8: "UInt8", + QValueKindUInt16: "UInt16", + QValueKindUInt32: "UInt32", + QValueKindUInt64: "UInt64", + QValueKindFloat32: "Float32", + QValueKindFloat64: "Float64", + QValueKindQChar: "FixedString(1)", + QValueKindString: "String", + QValueKindEnum: "LowCardinality(String)", + QValueKindJSON: "String", + QValueKindTimestamp: "DateTime64(6)", + QValueKindTimestampTZ: "DateTime64(6)", + QValueKindTime: "DateTime64(6)", + QValueKindTimeTZ: "DateTime64(6)", + QValueKindDate: "Date32", + QValueKindBytes: "String", + QValueKindUUID: "UUID", + QValueKindInvalid: "String", + QValueKindHStore: "String", + + QValueKindArrayFloat32: "Array(Float32)", + QValueKindArrayFloat64: "Array(Float64)", + QValueKindArrayInt16: "Array(Int16)", + QValueKindArrayInt32: "Array(Int32)", + QValueKindArrayInt64: "Array(Int64)", + QValueKindArrayString: "Array(String)", + QValueKindArrayEnum: "Array(LowCardinality(String))", + QValueKindArrayBoolean: "Array(Bool)", + QValueKindArrayDate: "Array(Date)", + QValueKindArrayInterval: "Array(String)", + QValueKindArrayTimestamp: "Array(DateTime64(6))", + QValueKindArrayTimestampTZ: "Array(DateTime64(6))", + QValueKindArrayJSON: "String", + QValueKindArrayJSONB: "String", + QValueKindArrayUUID: "Array(UUID)", +} diff --git a/flow/model/qvalue/qschema.go b/flow/shared/types/qschema.go similarity index 98% rename from flow/model/qvalue/qschema.go rename to flow/shared/types/qschema.go index 3ccaf0dc5f..cd45b3793e 100644 --- a/flow/model/qvalue/qschema.go +++ b/flow/shared/types/qschema.go @@ -1,4 +1,4 @@ -package qvalue +package types import ( "slices" diff --git a/flow/model/qvalue/qvalue.go b/flow/shared/types/qvalue.go similarity index 92% rename from flow/model/qvalue/qvalue.go rename to flow/shared/types/qvalue.go index 0d3e95968a..8e3779f88c 100644 --- a/flow/model/qvalue/qvalue.go +++ b/flow/shared/types/qvalue.go @@ -1,4 +1,4 @@ -package qvalue +package types import ( "time" @@ -237,7 +237,7 @@ func (v QValueQChar) Value() any { } func (v QValueQChar) LValue(ls *lua.LState) lua.LValue { - return lua.LString(v.Val) + return lua.LString(string(v.Val)) } type QValueString struct { @@ -321,7 +321,7 @@ func (v QValueDate) LValue(ls *lua.LState) lua.LValue { } type QValueTime struct { - Val time.Time + Val time.Duration } func (QValueTime) Kind() QValueKind { @@ -333,11 +333,11 @@ func (v QValueTime) Value() any { } func (v QValueTime) LValue(ls *lua.LState) lua.LValue { - return shared.LuaTime.New(ls, v.Val) + return shared.LuaTime.New(ls, time.Unix(0, 0).UTC().Add(v.Val)) } type QValueTimeTZ struct { - Val time.Time + Val time.Duration } func (QValueTimeTZ) Kind() QValueKind { @@ -349,7 +349,7 @@ func (v QValueTimeTZ) Value() any { } func (v QValueTimeTZ) LValue(ls *lua.LState) lua.LValue { - return shared.LuaTime.New(ls, v.Val) + return shared.LuaTime.New(ls, time.Unix(0, 0).UTC().Add(v.Val)) } type QValueInterval struct { @@ -368,24 +368,28 @@ func (v QValueInterval) LValue(ls *lua.LState) lua.LValue { return lua.LString(v.Val) } -type QValueTSTZRange struct { - Val string +type QValueArrayInterval struct { + Val []string } -func (QValueTSTZRange) Kind() QValueKind { - return QValueKindInterval +func (QValueArrayInterval) Kind() QValueKind { + return QValueKindArrayInterval } -func (v QValueTSTZRange) Value() any { +func (v QValueArrayInterval) Value() any { return v.Val } -func (v QValueTSTZRange) LValue(ls *lua.LState) lua.LValue { - return lua.LString(v.Val) +func (v QValueArrayInterval) LValue(ls *lua.LState) lua.LValue { + return shared.SliceToLTable(ls, v.Val, func(x string) lua.LValue { + return lua.LString(x) + }) } type QValueNumeric struct { - Val decimal.Decimal + Val decimal.Decimal + Precision int16 + Scale int16 } func (QValueNumeric) Kind() QValueKind { @@ -620,7 +624,7 @@ type QValueArrayInt16 struct { } func (QValueArrayInt16) Kind() QValueKind { - return QValueKindInt16 + return QValueKindArrayInt16 } func (v QValueArrayInt16) Value() any { @@ -638,7 +642,7 @@ type QValueArrayInt32 struct { } func (QValueArrayInt32) Kind() QValueKind { - return QValueKindInt32 + return QValueKindArrayInt32 } func (v QValueArrayInt32) Value() any { @@ -776,3 +780,23 @@ func (v QValueArrayEnum) LValue(ls *lua.LState) lua.LValue { return lua.LString(x) }) } + +type QValueArrayNumeric struct { + Val []decimal.Decimal + Precision int16 + Scale int16 +} + +func (QValueArrayNumeric) Kind() QValueKind { + return QValueKindArrayNumeric +} + +func (v QValueArrayNumeric) Value() any { + return v.Val +} + +func (v QValueArrayNumeric) LValue(ls *lua.LState) lua.LValue { + return shared.SliceToLTable(ls, v.Val, func(x decimal.Decimal) lua.LValue { + return shared.LuaDecimal.New(ls, x) + }) +} diff --git a/flow/model/qvalue/type_converter.go b/flow/shared/types/type_converter.go similarity index 98% rename from flow/model/qvalue/type_converter.go rename to flow/shared/types/type_converter.go index 331125145b..a93ea65b93 100644 --- a/flow/model/qvalue/type_converter.go +++ b/flow/shared/types/type_converter.go @@ -1,4 +1,4 @@ -package qvalue +package types //nolint:iface type TypeConversion interface { diff --git a/flow/workflows/cdc_flow.go b/flow/workflows/cdc_flow.go index 11856d1212..b411fc3750 100644 --- a/flow/workflows/cdc_flow.go +++ b/flow/workflows/cdc_flow.go @@ -224,8 +224,43 @@ func processTableAdditions( additionalTablesCfg.InitialSnapshotOnly = true additionalTablesCfg.TableMappings = flowConfigUpdate.AdditionalTables additionalTablesCfg.Resync = false + + addTablesSelector := workflow.NewNamedSelector(ctx, "AddTables") + addTablesSelector.AddReceive(ctx.Done(), func(_ workflow.ReceiveChannel, _ bool) {}) + flowSignalStateChangeChan := model.FlowSignalStateChange.GetSignalChannel(ctx) + flowSignalStateChangeChan.AddToSelector(addTablesSelector, func(val *protos.FlowStateChangeRequest, _ bool) { + if val.RequestedFlowState == protos.FlowStatus_STATUS_TERMINATING { + logger.Info("terminating CDCFlow during table additions") + state.ActiveSignal = model.TerminateSignal + dropCfg := syncStateToConfigProtoInCatalog(ctx, cfg, state) + state.DropFlowInput = &protos.DropFlowInput{ + FlowJobName: dropCfg.FlowJobName, + FlowConnectionConfigs: dropCfg, + DropFlowStats: val.DropMirrorStats, + SkipDestinationDrop: val.SkipDestinationDrop, + } + } else if val.RequestedFlowState == protos.FlowStatus_STATUS_RESYNC { + logger.Info("resync requested during table additions") + state.ActiveSignal = model.ResyncSignal + // since we are adding to TableMappings, multiple signals can lead to duplicates + // we should ContinueAsNew after the first signal in the selector, but just in case + cfg.Resync = true + cfg.DoInitialSnapshot = true + state.DropFlowInput = &protos.DropFlowInput{ + // to be filled in just before ContinueAsNew + FlowJobName: "", + FlowConnectionConfigs: nil, + DropFlowStats: val.DropMirrorStats, + SkipDestinationDrop: val.SkipDestinationDrop, + Resync: true, + } + } else if val.RequestedFlowState == protos.FlowStatus_STATUS_PAUSED { + logger.Info("pause requested during table additions, ignoring") + } + }) + // execute the sync flow as a child workflow - childAdditionalTablesCDCFlowOpts := workflow.ChildWorkflowOptions{ + childAddTablesCDCFlowOpts := workflow.ChildWorkflowOptions{ WorkflowID: childAdditionalTablesCDCFlowID, ParentClosePolicy: enums.PARENT_CLOSE_POLICY_REQUEST_CANCEL, RetryPolicy: &temporal.RetryPolicy{ @@ -234,16 +269,39 @@ func processTableAdditions( TypedSearchAttributes: mirrorNameSearch, WaitForCancellation: true, } - childAdditionalTablesCDCFlowCtx := workflow.WithChildOptions(ctx, childAdditionalTablesCDCFlowOpts) - childAdditionalTablesCDCFlowFuture := workflow.ExecuteChildWorkflow( - childAdditionalTablesCDCFlowCtx, + childAddTablesCDCFlowCtx := workflow.WithChildOptions(ctx, childAddTablesCDCFlowOpts) + childAddTablesCDCFlowFuture := workflow.ExecuteChildWorkflow( + childAddTablesCDCFlowCtx, CDCFlowWorkflow, additionalTablesCfg, nil, ) var res *CDCFlowWorkflowResult - if err := childAdditionalTablesCDCFlowFuture.Get(childAdditionalTablesCDCFlowCtx, &res); err != nil { - return err + var addTablesFlowErr error + addTablesSelector.AddFuture(childAddTablesCDCFlowFuture, func(f workflow.Future) { + addTablesFlowErr = f.Get(childAddTablesCDCFlowCtx, &res) + }) + + for res == nil { + addTablesSelector.Select(ctx) + if state.ActiveSignal == model.TerminateSignal || state.ActiveSignal == model.ResyncSignal { + if state.ActiveSignal == model.ResyncSignal { + // additional tables should also be resynced, we don't know how much was done so far + state.SyncFlowOptions.TableMappings = append(state.SyncFlowOptions.TableMappings, flowConfigUpdate.AdditionalTables...) + resyncCfg := syncStateToConfigProtoInCatalog(ctx, cfg, state) + state.DropFlowInput.FlowJobName = resyncCfg.FlowJobName + state.DropFlowInput.FlowConnectionConfigs = resyncCfg + } + return workflow.NewContinueAsNewError(ctx, DropFlowWorkflow, state.DropFlowInput) + } + if err := ctx.Err(); err != nil { + logger.Info("CDCFlow canceled during table additions", slog.Any("error", err)) + return err + } + if addTablesFlowErr != nil { + logger.Error("failed to execute child CDCFlow for additional tables", slog.Any("error", addTablesFlowErr)) + return fmt.Errorf("failed to execute child CDCFlow for additional tables: %w", addTablesFlowErr) + } } maps.Copy(state.SyncFlowOptions.SrcTableIdNameMapping, res.SyncFlowOptions.SrcTableIdNameMapping) @@ -459,9 +517,9 @@ func CDCFlowWorkflow( // a suffix to the table names. if cfg.Resync { for _, mapping := range state.SyncFlowOptions.TableMappings { - oldName := mapping.DestinationTableIdentifier - newName := oldName + "_resync" - mapping.DestinationTableIdentifier = newName + if mapping.Engine != protos.TableEngine_CH_ENGINE_NULL { + mapping.DestinationTableIdentifier += "_resync" + } } // because we have renamed the tables. cfg.TableMappings = state.SyncFlowOptions.TableMappings @@ -471,9 +529,9 @@ func CDCFlowWorkflow( // it should return the table schema for the source peer setupFlowID := GetChildWorkflowID("setup-flow", cfg.FlowJobName, originalRunID) - selector := workflow.NewNamedSelector(ctx, "Setup/Snapshot") - selector.AddReceive(ctx.Done(), func(_ workflow.ReceiveChannel, _ bool) {}) - flowSignalStateChangeChan.AddToSelector(selector, func(val *protos.FlowStateChangeRequest, _ bool) { + setupSnapshotSelector := workflow.NewNamedSelector(ctx, "Setup/Snapshot") + setupSnapshotSelector.AddReceive(ctx.Done(), func(_ workflow.ReceiveChannel, _ bool) {}) + flowSignalStateChangeChan.AddToSelector(setupSnapshotSelector, func(val *protos.FlowStateChangeRequest, _ bool) { if val.RequestedFlowState == protos.FlowStatus_STATUS_PAUSED { logger.Warn("pause requested during setup, ignoring") } else if val.RequestedFlowState == protos.FlowStatus_STATUS_TERMINATING { @@ -490,6 +548,8 @@ func CDCFlowWorkflow( cfg.Resync = true cfg.DoInitialSnapshot = true cfg.TableMappings = originalTableMappings + // this is the only place where we can have a resync during a resync + // so we need to NOT sync the tableMappings to catalog to preserve original names uploadConfigToCatalog(ctx, cfg) state.DropFlowInput = &protos.DropFlowInput{ FlowJobName: cfg.FlowJobName, @@ -515,12 +575,12 @@ func CDCFlowWorkflow( var setupFlowOutput *protos.SetupFlowOutput var setupFlowError error - selector.AddFuture(setupFlowFuture, func(f workflow.Future) { + setupSnapshotSelector.AddFuture(setupFlowFuture, func(f workflow.Future) { setupFlowError = f.Get(setupFlowCtx, &setupFlowOutput) }) for setupFlowOutput == nil { - selector.Select(ctx) + setupSnapshotSelector.Select(ctx) if state.ActiveSignal == model.TerminateSignal || state.ActiveSignal == model.ResyncSignal { return state, workflow.NewContinueAsNewError(ctx, DropFlowWorkflow, state.DropFlowInput) } @@ -554,13 +614,13 @@ func CDCFlowWorkflow( snapshotFlowFuture := workflow.ExecuteChildWorkflow(snapshotFlowCtx, SnapshotFlowWorkflow, cfg) var snapshotDone bool var snapshotError error - selector.AddFuture(snapshotFlowFuture, func(f workflow.Future) { + setupSnapshotSelector.AddFuture(snapshotFlowFuture, func(f workflow.Future) { snapshotError = f.Get(snapshotFlowCtx, nil) snapshotDone = true }) for !snapshotDone { - selector.Select(ctx) + setupSnapshotSelector.Select(ctx) if state.ActiveSignal == model.TerminateSignal || state.ActiveSignal == model.ResyncSignal { return state, workflow.NewContinueAsNewError(ctx, DropFlowWorkflow, state.DropFlowInput) } @@ -581,13 +641,20 @@ func CDCFlowWorkflow( } for _, mapping := range state.SyncFlowOptions.TableMappings { - oldName := mapping.DestinationTableIdentifier - newName := strings.TrimSuffix(oldName, "_resync") - renameOpts.RenameTableOptions = append(renameOpts.RenameTableOptions, &protos.RenameTableOption{ - CurrentName: oldName, - NewName: newName, - }) - mapping.DestinationTableIdentifier = newName + if mapping.Engine != protos.TableEngine_CH_ENGINE_NULL { + oldName := mapping.DestinationTableIdentifier + newName := strings.TrimSuffix(oldName, "_resync") + renameOpts.RenameTableOptions = append(renameOpts.RenameTableOptions, &protos.RenameTableOption{ + CurrentName: oldName, + NewName: newName, + }) + mapping.DestinationTableIdentifier = newName + } else { + renameOpts.RenameTableOptions = append(renameOpts.RenameTableOptions, &protos.RenameTableOption{ + CurrentName: mapping.DestinationTableIdentifier, + NewName: mapping.DestinationTableIdentifier, + }) + } } renameTablesCtx := workflow.WithActivityOptions(ctx, workflow.ActivityOptions{ @@ -598,8 +665,28 @@ func CDCFlowWorkflow( }, }) renameTablesFuture := workflow.ExecuteActivity(renameTablesCtx, flowable.RenameTables, renameOpts) - if err := renameTablesFuture.Get(renameTablesCtx, nil); err != nil { - return state, fmt.Errorf("failed to execute rename tables activity: %w", err) + var renameTablesDone bool + var renameTablesError error + setupSnapshotSelector.AddFuture(renameTablesFuture, func(f workflow.Future) { + renameTablesDone = true + if err := f.Get(renameTablesCtx, nil); err != nil { + renameTablesError = fmt.Errorf("failed to execute rename tables activity: %w", err) + logger.Error("failed to execute rename tables activity", slog.Any("error", err)) + } else { + logger.Info("rename tables activity completed successfully") + } + }) + for !renameTablesDone { + setupSnapshotSelector.Select(ctx) + if state.ActiveSignal == model.TerminateSignal || state.ActiveSignal == model.ResyncSignal { + return state, workflow.NewContinueAsNewError(ctx, DropFlowWorkflow, state.DropFlowInput) + } + if err := ctx.Err(); err != nil { + return nil, err + } + if renameTablesError != nil { + return state, renameTablesError + } } } diff --git a/flow/workflows/qrep_flow.go b/flow/workflows/qrep_flow.go index 42afb4e147..4caa3a7bd5 100644 --- a/flow/workflows/qrep_flow.go +++ b/flow/workflows/qrep_flow.go @@ -116,6 +116,7 @@ func (q *QRepFlowExecution) setupTableSchema(ctx workflow.Context, tableName str FlowName: q.config.FlowJobName, System: q.config.System, Env: q.config.Env, + Version: q.config.Version, } return workflow.ExecuteActivity(ctx, flowable.SetupTableSchema, tableSchemaInput).Get(ctx, nil) diff --git a/flow/workflows/setup_flow.go b/flow/workflows/setup_flow.go index b1326d2a02..0beefbe657 100644 --- a/flow/workflows/setup_flow.go +++ b/flow/workflows/setup_flow.go @@ -196,6 +196,7 @@ func (s *SetupFlowExecution) setupNormalizedTables( FlowName: s.cdcFlowName, System: flowConnectionConfigs.System, Env: flowConnectionConfigs.Env, + Version: flowConnectionConfigs.Version, } if err := workflow.ExecuteActivity(ctx, flowable.SetupTableSchema, tableSchemaInput).Get(ctx, nil); err != nil { diff --git a/flow/workflows/snapshot_flow.go b/flow/workflows/snapshot_flow.go index afedd7b976..71867fd75f 100644 --- a/flow/workflows/snapshot_flow.go +++ b/flow/workflows/snapshot_flow.go @@ -226,6 +226,7 @@ func (s *SnapshotFlowExecution) cloneTable( ParentMirrorName: flowName, Exclude: mapping.Exclude, Columns: mapping.Columns, + Version: s.config.Version, } boundSelector.SpawnChild(childCtx, QRepFlowWorkflow, nil, config, nil) diff --git a/nexus/Cargo.lock b/nexus/Cargo.lock index 26658dd2ca..651877ec6c 100644 --- a/nexus/Cargo.lock +++ b/nexus/Cargo.lock @@ -13,9 +13,9 @@ dependencies = [ [[package]] name = "adler2" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "512761e0bb2578dd7380c6baaa0f4ce03e84f95e960231d1dec8bf4d7d6e2627" +checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" [[package]] name = "aead" @@ -95,9 +95,9 @@ dependencies = [ [[package]] name = "anstream" -version = "0.6.18" +version = "0.6.19" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8acc5369981196006228e28809f761875c0327210a891e941f4c683b3a99529b" +checksum = "301af1932e46185686725e0fad2f8f2aa7da69dd70bf6ecc44d6b703844a3933" dependencies = [ "anstyle", "anstyle-parse", @@ -110,33 +110,33 @@ dependencies = [ [[package]] name = "anstyle" -version = "1.0.10" +version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55cc3b69f167a1ef2e161439aa98aed94e6028e5f9a59be9a6ffb47aef1651f9" +checksum = "862ed96ca487e809f1c8e5a8447f6ee2cf102f846893800b20cebdf541fc6bbd" [[package]] name = "anstyle-parse" -version = "0.2.6" +version = "0.2.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b2d16507662817a6a20a9ea92df6652ee4f94f914589377d69f3b21bc5798a9" +checksum = "4e7644824f0aa2c7b9384579234ef10eb7efb6a0deb83f9630a49594dd9c15c2" dependencies = [ "utf8parse", ] [[package]] name = "anstyle-query" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79947af37f4177cfead1110013d678905c37501914fba0efea834c3fe9a8d60c" +checksum = "6c8bdeb6047d8983be085bab0ba1472e6dc604e7041dbf6fcd5e71523014fae9" dependencies = [ "windows-sys 0.59.0", ] [[package]] name = "anstyle-wincon" -version = "3.0.8" +version = "3.0.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa" +checksum = "403f75924867bb1033c59fbf0797484329750cfbe3c4325cd33127941fabc882" dependencies = [ "anstyle", "once_cell_polyfill", @@ -169,9 +169,9 @@ checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "async-compression" -version = "0.4.23" +version = "0.4.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b37fc50485c4f3f736a4fb14199f6d5f5ba008d7f28fe710306c92780f004c07" +checksum = "40f6024f3f856663b45fd0c9b6f2024034a702f453549449e0d84a305900dad4" dependencies = [ "flate2", "futures-core", @@ -199,7 +199,7 @@ checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -210,7 +210,7 @@ checksum = "e539d3fca749fcee5236ab05e93a52867dd549cc157c8cb7f99595f3cedffdb5" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -221,15 +221,15 @@ checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] name = "autocfg" -version = "1.4.0" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" +checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" [[package]] name = "aws-config" -version = "1.6.3" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02a18fd934af6ae7ca52410d4548b98eb895aab0f1ea417d168d85db1434a141" +checksum = "455e9fb7743c6f6267eb2830ccc08686fbb3d13c9a689369562fd4d4ef9ea462" dependencies = [ "aws-credential-types", "aws-runtime", @@ -293,9 +293,9 @@ dependencies = [ [[package]] name = "aws-runtime" -version = "1.5.7" +version = "1.5.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c4063282c69991e57faab9e5cb21ae557e59f5b0fb285c196335243df8dc25c" +checksum = "4f6c68419d8ba16d9a7463671593c54f81ba58cab466e9b759418da606dcc2e2" dependencies = [ "aws-credential-types", "aws-sigv4", @@ -317,9 +317,9 @@ dependencies = [ [[package]] name = "aws-sdk-kms" -version = "1.72.0" +version = "1.76.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0eef6a94141a43ee28404bf135828ad9bdd4936bfa2a84ad8dea355c94646a35" +checksum = "8565497721d9f18fa29a68bc5d8225b39e1cc7399d7fc6f1ad803ca934341804" dependencies = [ "aws-credential-types", "aws-runtime", @@ -339,9 +339,9 @@ dependencies = [ [[package]] name = "aws-sdk-sso" -version = "1.71.0" +version = "1.73.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "95a4fd09d6e863655d99cd2260f271c6d1030dc6bfad68e19e126d2e4c8ceb18" +checksum = "b2ac1674cba7872061a29baaf02209fefe499ff034dfd91bd4cc59e4d7741489" dependencies = [ "aws-credential-types", "aws-runtime", @@ -361,9 +361,9 @@ dependencies = [ [[package]] name = "aws-sdk-ssooidc" -version = "1.72.0" +version = "1.74.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3224ab02ebb3074467a33d57caf6fcb487ca36f3697fdd381b0428dc72380696" +checksum = "3a6a22f077f5fd3e3c0270d4e1a110346cddf6769e9433eb9e6daceb4ca3b149" dependencies = [ "aws-credential-types", "aws-runtime", @@ -383,9 +383,9 @@ dependencies = [ [[package]] name = "aws-sdk-sts" -version = "1.72.0" +version = "1.75.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f6933f189ed1255e78175fbd73fb200c0aae7240d220ed3346f567b0ddca3083" +checksum = "e3258fa707f2f585ee3049d9550954b959002abd59176975150a01d5cf38ae3f" dependencies = [ "aws-credential-types", "aws-runtime", @@ -406,9 +406,9 @@ dependencies = [ [[package]] name = "aws-sigv4" -version = "1.3.2" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3734aecf9ff79aa401a6ca099d076535ab465ff76b46440cf567c8e70b65dc13" +checksum = "ddfb9021f581b71870a17eac25b52335b82211cdc092e02b6876b2bcefa61666" dependencies = [ "aws-credential-types", "aws-smithy-http", @@ -459,9 +459,9 @@ dependencies = [ [[package]] name = "aws-smithy-http-client" -version = "1.0.3" +version = "1.0.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "073d330f94bdf1f47bb3e0f5d45dda1e372a54a553c39ab6e9646902c8c81594" +checksum = "7f491388e741b7ca73b24130ff464c1478acc34d5b331b7dd0a2ee4643595a15" dependencies = [ "aws-smithy-async", "aws-smithy-runtime-api", @@ -474,11 +474,11 @@ dependencies = [ "hyper 0.14.32", "hyper 1.6.0", "hyper-rustls 0.24.2", - "hyper-rustls 0.27.6", + "hyper-rustls 0.27.7", "hyper-util", "pin-project-lite", "rustls 0.21.12", - "rustls 0.23.27", + "rustls 0.23.28", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", @@ -488,9 +488,9 @@ dependencies = [ [[package]] name = "aws-smithy-json" -version = "0.61.3" +version = "0.61.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92144e45819cae7dc62af23eac5a038a58aa544432d2102609654376a900bd07" +checksum = "a16e040799d29c17412943bdbf488fd75db04112d0c0d4b9290bacf5ae0014b9" dependencies = [ "aws-smithy-types", ] @@ -540,9 +540,9 @@ dependencies = [ [[package]] name = "aws-smithy-runtime-api" -version = "1.8.0" +version = "1.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1e5d9e3a80a18afa109391fb5ad09c3daf887b516c6fd805a157c6ea7994a57" +checksum = "bd8531b6d8882fd8f48f82a9754e682e29dd44cff27154af51fa3eb730f59efb" dependencies = [ "aws-smithy-async", "aws-smithy-types", @@ -557,9 +557,9 @@ dependencies = [ [[package]] name = "aws-smithy-types" -version = "1.3.1" +version = "1.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40076bd09fadbc12d5e026ae080d0930defa606856186e31d83ccc6a255eeaf3" +checksum = "d498595448e43de7f4296b7b7a18a8a02c61ec9349128c80a368f7c3b4ab11a8" dependencies = [ "base64-simd", "bytes", @@ -583,9 +583,9 @@ dependencies = [ [[package]] name = "aws-smithy-xml" -version = "0.60.9" +version = "0.60.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab0b0166827aa700d3dc519f72f8b3a91c35d0b8d042dc5d643a91e6f80648fc" +checksum = "3db87b96cb1b16c024980f133968d52882ca0daaee3a086c6decc500f6c99728" dependencies = [ "xmlparser", ] @@ -690,9 +690,9 @@ dependencies = [ [[package]] name = "base64ct" -version = "1.7.3" +version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e25b6adfb930f02d1981565a6e5d9c547ac15a96606256d3b59040e5cd4ca3" +checksum = "55248b47b0caf0546f7988906588779981c43bb1bc9d0c44087278f80cdb44ba" [[package]] name = "bcder" @@ -723,7 +723,7 @@ dependencies = [ "regex", "rustc-hash 1.1.0", "shlex", - "syn 2.0.101", + "syn 2.0.104", "which", ] @@ -783,7 +783,7 @@ dependencies = [ "proc-macro-crate", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -797,9 +797,9 @@ dependencies = [ [[package]] name = "bumpalo" -version = "3.17.0" +version = "3.18.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1628fb46dfa0b37568d12e5edd512553eccf6a22a78e8bde00bb4aed84d5bdbf" +checksum = "793db76d6187cd04dff33004d8e6c9cc4e05cd330500379d2394209271b4aeee" [[package]] name = "bytecheck" @@ -917,9 +917,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.25" +version = "1.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0fc897dc1e865cc67c0e05a836d9d3f1df3cbe442aa4a9473b18e12624a4951" +checksum = "d487aa071b5f64da6f19a3e848e3578944b726ee5a4854b82172f02aa876bfdc" dependencies = [ "jobserver", "libc", @@ -937,9 +937,9 @@ dependencies = [ [[package]] name = "cfg-if" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +checksum = "9555578bc9e57714c812a1f84e4fc5b4d21fcb063490c624de019f7464c91268" [[package]] name = "cfg_aliases" @@ -1010,9 +1010,9 @@ dependencies = [ [[package]] name = "clap" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fd60e63e9be68e5fb56422e397cf9baddded06dae1d2e523401542383bc72a9f" +checksum = "40b6887a1d8685cebccf115538db5c0efe625ccac9696ad45c409d96566e910f" dependencies = [ "clap_builder", "clap_derive", @@ -1020,9 +1020,9 @@ dependencies = [ [[package]] name = "clap_builder" -version = "4.5.39" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89cc6392a1f72bbeb820d71f32108f61fdaf18bc526e1d23954168a67759ef51" +checksum = "e0c66c08ce9f0c698cbce5c0279d0bb6ac936d8674174fe48f736533b964f59e" dependencies = [ "anstream", "anstyle", @@ -1032,21 +1032,21 @@ dependencies = [ [[package]] name = "clap_derive" -version = "4.5.32" +version = "4.5.40" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09176aae279615badda0765c0c0b3f6ed53f4709118af73cf4655d85d1530cd7" +checksum = "d2c7947ae4cc3d851207c1adb5b5e260ff0cca11446b1d6d1423788e442257ce" dependencies = [ "heck", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] name = "clap_lex" -version = "0.7.4" +version = "0.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f46ad14479a25103f283c0f10005961cf086d8dc42205bb44c46ac563475dca6" +checksum = "b94f61472cee1439c0b966b47e3aca9ae07e45d070759512cd390ea2bebc6675" [[package]] name = "cmake" @@ -1059,9 +1059,9 @@ dependencies = [ [[package]] name = "colorchoice" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b63caa9aa9397e2d9480a9b13673856c78d8ac123288526c37d7839f2a86990" +checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] name = "const-oid" @@ -1235,7 +1235,7 @@ checksum = "8034092389675178f570469e6c3b0465d3d30b4505c294a6550db47f3c17ad18" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -1256,7 +1256,7 @@ checksum = "2cdc8d50f426189eef89dac62fabfa0abb27d5cc008f25bf4156a0203325becc" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -1279,7 +1279,7 @@ checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -1342,12 +1342,12 @@ checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] name = "errno" -version = "0.3.12" +version = "0.3.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" +checksum = "778e2ac28f6c47af28e4907f13ffd1e1ddbd400980a9abd7c8df189bf578a5ad" dependencies = [ "libc", - "windows-sys 0.59.0", + "windows-sys 0.60.2", ] [[package]] @@ -1388,9 +1388,9 @@ checksum = "b7ac824320a75a52197e8f2d787f6a38b6718bb6897a35142d749af3c0e8f4fe" [[package]] name = "flate2" -version = "1.1.1" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7ced92e76e966ca2fd84c8f7aa01a4aea65b0eb6648d72f7c8f3e2764a67fece" +checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" dependencies = [ "crc32fast", "miniz_oxide", @@ -1497,7 +1497,7 @@ checksum = "162ee34ebcb7c64a8abebc059ce0fee27c2262618d7b60ed8faf72fef13c3650" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -1577,7 +1577,7 @@ dependencies = [ "cfg-if", "js-sys", "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.11.1+wasi-snapshot-preview1", "wasm-bindgen", ] @@ -1662,9 +1662,9 @@ checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" [[package]] name = "hashbrown" -version = "0.15.3" +version = "0.15.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84b26c544d002229e640969970a2e74021aadf6e2f96372b9c58eff97de08eb3" +checksum = "5971ac85611da7067dbfcabef3c70ebb5606018acd9e2a3903a0da507521e0d5" dependencies = [ "allocator-api2", "equivalent", @@ -1679,9 +1679,9 @@ checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" [[package]] name = "hermit-abi" -version = "0.5.1" +version = "0.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" +checksum = "fc0fef456e4baa96da950455cd02c081ca953b141298e41db3fc7e36b1da849c" [[package]] name = "hex" @@ -1838,20 +1838,20 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.6" +version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03a01595e11bdcec50946522c32dde3fc6914743000a68b93000965f2f02406d" +checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ "http 1.3.1", "hyper 1.6.0", "hyper-util", - "rustls 0.23.27", + "rustls 0.23.28", "rustls-native-certs 0.8.1", "rustls-pki-types", "tokio", "tokio-rustls 0.26.2", "tower-service", - "webpki-roots 1.0.0", + "webpki-roots 1.0.1", ] [[package]] @@ -1869,9 +1869,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.13" +version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1c293b6b3d21eca78250dc7dbebd6b9210ec5530e038cbfe0661b5c47ab06e8" +checksum = "dc2fdfdbff08affe55bb779f33b053aa1fe5dd5b54c257343c17edfa55711bdb" dependencies = [ "base64 0.22.1", "bytes", @@ -2039,7 +2039,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cea70ddb795996207ad57735b50c5982d8844f38ba9ee5f1aedcfb708a2aa11e" dependencies = [ "equivalent", - "hashbrown 0.15.3", + "hashbrown 0.15.4", ] [[package]] @@ -2171,7 +2171,7 @@ dependencies = [ "proc-macro2", "quote", "regex", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -2191,9 +2191,9 @@ checksum = "830d08ce1d1d941e6b30645f1a0eb5643013d835ce3779a5fc208261dbe10f55" [[package]] name = "libc" -version = "0.2.172" +version = "0.2.174" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" +checksum = "1171693293099992e19cddea4e8b849964e9846f4acee11b3948bcc337be8776" [[package]] name = "libloading" @@ -2202,7 +2202,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", - "windows-targets 0.53.0", + "windows-targets 0.53.2", ] [[package]] @@ -2288,7 +2288,7 @@ version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9f8cc7106155f10bdf99a6f379688f543ad6596a415375b36a59a054ceda1198" dependencies = [ - "hashbrown 0.15.3", + "hashbrown 0.15.4", ] [[package]] @@ -2341,9 +2341,9 @@ checksum = "490cc448043f947bae3cbee9c203358d62dbee0db12107a74be5c30ccfd09771" [[package]] name = "memchr" -version = "2.7.4" +version = "2.7.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" +checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "mime" @@ -2359,9 +2359,9 @@ checksum = "68354c5c6bd36d73ff3feceb05efa59b6acb7626617f4962be322a825e61f79a" [[package]] name = "miniz_oxide" -version = "0.8.8" +version = "0.8.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3be647b768db090acb35d5ec5db2b0e1f1de11133ca123b9eacf5137868f892a" +checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", ] @@ -2373,7 +2373,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", - "wasi 0.11.0+wasi-snapshot-preview1", + "wasi 0.11.1+wasi-snapshot-preview1", "windows-sys 0.59.0", ] @@ -2401,7 +2401,7 @@ dependencies = [ "pem", "percent-encoding", "rand 0.9.1", - "rustls 0.23.27", + "rustls 0.23.28", "rustls-pemfile 2.2.0", "serde", "serde_json", @@ -2967,7 +2967,7 @@ checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -3058,7 +3058,7 @@ dependencies = [ "anyhow", "futures-util", "pt", - "rustls 0.23.27", + "rustls 0.23.28", "ssh2", "tokio", "tokio-postgres", @@ -3139,12 +3139,12 @@ dependencies = [ [[package]] name = "prettyplease" -version = "0.2.33" +version = "0.2.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9dee91521343f4c5c6a63edd65e54f31f5c92fe8978c40a4282f8372194c6a7d" +checksum = "061c1221631e079b26479d25bbf2275bfe5917ae8419cd7e34f13bfc2aa7539a" dependencies = [ "proc-macro2", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -3193,7 +3193,7 @@ dependencies = [ "pulldown-cmark", "pulldown-cmark-to-cmark", "regex", - "syn 2.0.101", + "syn 2.0.104", "tempfile", ] @@ -3207,7 +3207,7 @@ dependencies = [ "itertools 0.14.0", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -3293,7 +3293,7 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash 2.1.1", - "rustls 0.23.27", + "rustls 0.23.28", "socket2", "thiserror 2.0.12", "tokio", @@ -3313,7 +3313,7 @@ dependencies = [ "rand 0.9.1", "ring", "rustc-hash 2.1.1", - "rustls 0.23.27", + "rustls 0.23.28", "rustls-pki-types", "slab", "thiserror 2.0.12", @@ -3324,9 +3324,9 @@ dependencies = [ [[package]] name = "quinn-udp" -version = "0.5.12" +version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee4e529991f949c5e25755532370b8af5d114acae52326361d68d47af64aa842" +checksum = "fcebb1209ee276352ef14ff8732e24cc2b02bbac986cd74a4c81bcb2f9881970" dependencies = [ "cfg_aliases", "libc", @@ -3347,9 +3347,9 @@ dependencies = [ [[package]] name = "r-efi" -version = "5.2.0" +version = "5.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74765f6d916ee2faa39bc8e68e4f3ed8949b48cccdac59983d287a7cb71ce9c5" +checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f" [[package]] name = "radium" @@ -3438,9 +3438,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.12" +version = "0.5.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" +checksum = "0d04b7d0ee6b4a0207a0a7adb104d23ecb0b47d6beae7152d0fa34b692b29fd6" dependencies = [ "bitflags", ] @@ -3485,7 +3485,7 @@ dependencies = [ "quote", "refinery-core", "regex", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -3549,9 +3549,9 @@ dependencies = [ [[package]] name = "reqwest" -version = "0.12.19" +version = "0.12.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a2f8e5513d63f2e5b386eb5106dc67eaf3f84e95258e210489136b8b92ad6119" +checksum = "eabf4c97d9130e2bf606614eb937e86edac8292eaa6f422f995d7e8de1eb1813" dependencies = [ "async-compression", "base64 0.22.1", @@ -3562,17 +3562,14 @@ dependencies = [ "http-body 1.0.1", "http-body-util", "hyper 1.6.0", - "hyper-rustls 0.27.6", + "hyper-rustls 0.27.7", "hyper-util", - "ipnet", "js-sys", "log", - "mime", - "once_cell", "percent-encoding", "pin-project-lite", "quinn", - "rustls 0.23.27", + "rustls 0.23.28", "rustls-pki-types", "serde", "serde_json", @@ -3588,7 +3585,7 @@ dependencies = [ "wasm-bindgen", "wasm-bindgen-futures", "web-sys", - "webpki-roots 1.0.0", + "webpki-roots 1.0.1", ] [[package]] @@ -3656,9 +3653,9 @@ dependencies = [ [[package]] name = "rust_decimal" -version = "1.37.1" +version = "1.37.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "faa7de2ba56ac291bd90c6b9bece784a52ae1411f9506544b3eae36dd2356d50" +checksum = "b203a6425500a03e0919c42d3c47caca51e79f1132046626d2c8871c5092035d" dependencies = [ "arrayvec", "borsh", @@ -3673,9 +3670,9 @@ dependencies = [ [[package]] name = "rustc-demangle" -version = "0.1.24" +version = "0.1.25" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f" +checksum = "989e6739f80c4ad5b13e0fd7fe89531180375b18520cc8c82080e4dc4035b84f" [[package]] name = "rustc-hash" @@ -3738,9 +3735,9 @@ dependencies = [ [[package]] name = "rustls" -version = "0.23.27" +version = "0.23.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "730944ca083c1c233a75c09f199e973ca499344a2b7ba9e755c457e86fb4a321" +checksum = "7160e3e10bf4535308537f3c4e1641468cd0e485175d6163087c0393c7d46643" dependencies = [ "aws-lc-rs", "log", @@ -3972,7 +3969,7 @@ checksum = "5b0276cf7f2c73365f7157c8123c21cd9a50fbbd844757af28ca1f5925fc2a00" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -3989,9 +3986,9 @@ dependencies = [ [[package]] name = "serde_spanned" -version = "0.6.8" +version = "0.6.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" +checksum = "bf41e0cfaf7226dca15e8197172c295a782857fcb97fad1808a166870dee75a3" dependencies = [ "serde", ] @@ -4102,18 +4099,15 @@ checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" [[package]] name = "slab" -version = "0.4.9" +version = "0.4.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8f92a496fb766b417c996b9c5e57daf2f7ad3b0bebe1ccfca4856390e3d3bb67" -dependencies = [ - "autocfg", -] +checksum = "04dc19736151f35336d325007ac991178d504a119863a2fcb3758cdb5e52c50d" [[package]] name = "smallvec" -version = "1.15.0" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8917285742e9f3e1683f0a9c4e6b57960b7314d0b08d30d1ecd426713ee2eee9" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" @@ -4157,7 +4151,7 @@ source = "git+https://github.com/peerdb-io/sqlparser-rs.git?branch=main#8c341b80 dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -4214,9 +4208,9 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.101" +version = "2.0.104" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8ce2b7fc941b3a24138a0a7cf8e858bfc6a992e7978a068a5c760deb0ed43caf" +checksum = "17b6f705963418cdb9927482fa304bc562ece2fdd4f616084c50b7023b435a40" dependencies = [ "proc-macro2", "quote", @@ -4240,7 +4234,7 @@ checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -4298,7 +4292,7 @@ checksum = "4fee6c4efc90059e10f81e6d42c60a18f76588c3d74cb83a0b242a2b6c7504c1" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -4309,17 +4303,16 @@ checksum = "7f7cf42b4507d8ea322120659672cf1b9dbb93f8f2d4ecfd6e51350ff5b17a1d" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] name = "thread_local" -version = "1.1.8" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" dependencies = [ "cfg-if", - "once_cell", ] [[package]] @@ -4398,7 +4391,7 @@ checksum = "2d2e76690929402faae40aebdda620a2c0e25dd6d3b9afe48867dfd95991f4bd" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -4427,7 +4420,7 @@ checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -4464,7 +4457,7 @@ checksum = "27d684bad428a0f2481f42241f821db42c54e2dc81d8c00db8536c506b0a0144" dependencies = [ "const-oid", "ring", - "rustls 0.23.27", + "rustls 0.23.28", "tokio", "tokio-postgres", "tokio-rustls 0.26.2", @@ -4487,7 +4480,7 @@ version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls 0.23.27", + "rustls 0.23.28", "tokio", ] @@ -4518,9 +4511,9 @@ dependencies = [ [[package]] name = "toml" -version = "0.8.22" +version = "0.8.23" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ae329d1f08c4d17a59bed7ff5b5a769d062e64a62d34a3261b219e62cd5aae" +checksum = "dc1beb996b9d83529a9e75c17a1686767d148d70663143c7854d8b4a09ced362" dependencies = [ "serde", "serde_spanned", @@ -4530,18 +4523,18 @@ dependencies = [ [[package]] name = "toml_datetime" -version = "0.6.9" +version = "0.6.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3da5db5a963e24bc68be8b17b6fa82814bb22ee8660f192bb182771d498f09a3" +checksum = "22cddaf88f4fbc13c51aebbf5f8eceb5c7c5a9da2ac40a13519eb5b0a0e8f11c" dependencies = [ "serde", ] [[package]] name = "toml_edit" -version = "0.22.26" +version = "0.22.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "310068873db2c5b3e7659d2cc35d21855dbafa50d1ce336397c666e3cb08137e" +checksum = "41fe8c660ae4257887cf66394862d21dbca4a6ddd26f04a3560410406a2f819a" dependencies = [ "indexmap 2.9.0", "serde", @@ -4553,9 +4546,9 @@ dependencies = [ [[package]] name = "toml_write" -version = "0.1.1" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfb942dfe1d8e29a7ee7fcbde5bd2b9a25fb89aa70caea2eba3bee836ff41076" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" [[package]] name = "tonic" @@ -4601,7 +4594,7 @@ dependencies = [ "prost-build", "prost-types", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -4720,20 +4713,20 @@ dependencies = [ [[package]] name = "tracing-attributes" -version = "0.1.28" +version = "0.1.30" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "395ae124c09f9e6918a2310af6038fba074bcf474ac352496d5910dd59a2226d" +checksum = "81383ab64e72a7a8b8e13130c49e3dab29def6d0c7d76a03087b3cf71c5c6903" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] name = "tracing-core" -version = "0.1.33" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e672c95779cf947c5311f83787af4fa8fffd12fb27e4993211a84bdfd9610f9c" +checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", "valuable", @@ -4776,9 +4769,9 @@ checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] name = "twox-hash" -version = "2.1.0" +version = "2.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7b17f197b3050ba473acf9181f7b1d3b66d1cf7356c6cc57886662276e65908" +checksum = "8b907da542cbced5261bd3256de1b3a1bf340a3d37f93425a07362a1d687de56" [[package]] name = "typenum" @@ -4944,9 +4937,9 @@ dependencies = [ [[package]] name = "wasi" -version = "0.11.0+wasi-snapshot-preview1" +version = "0.11.1+wasi-snapshot-preview1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423" +checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b" [[package]] name = "wasi" @@ -4985,7 +4978,7 @@ dependencies = [ "log", "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", "wasm-bindgen-shared", ] @@ -5020,7 +5013,7 @@ checksum = "8ae87ea40c9f689fc23f209965b6fb8a99ad69aeeb0231408be24920604395de" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -5060,14 +5053,14 @@ version = "0.26.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "521bc38abb08001b01866da9f51eb7c5d647a19260e00054a8c7fd5f9e57f7a9" dependencies = [ - "webpki-roots 1.0.0", + "webpki-roots 1.0.1", ] [[package]] name = "webpki-roots" -version = "1.0.0" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2853738d1cc4f2da3a225c18ec6c3721abb31961096e9dbf5ab35fa88b19cfdb" +checksum = "8782dd5a41a24eed3a4f40b606249b3e236ca61adf1f25ea4d45c73de122b502" dependencies = [ "rustls-pki-types", ] @@ -5147,7 +5140,7 @@ checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -5158,14 +5151,14 @@ checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] name = "windows-link" -version = "0.1.1" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +checksum = "5e6ad25900d524eaabdbbb96d20b4311e1e7ae1699af4fb28c17ae66c80d798a" [[package]] name = "windows-result" @@ -5203,6 +5196,15 @@ dependencies = [ "windows-targets 0.52.6", ] +[[package]] +name = "windows-sys" +version = "0.60.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb" +dependencies = [ + "windows-targets 0.53.2", +] + [[package]] name = "windows-targets" version = "0.52.6" @@ -5221,9 +5223,9 @@ dependencies = [ [[package]] name = "windows-targets" -version = "0.53.0" +version = "0.53.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1e4c7e8ceaaf9cb7d7507c974735728ab453b67ef8f18febdd7c11fe59dca8b" +checksum = "c66f69fcc9ce11da9966ddb31a40968cad001c5bedeb5c2b82ede4253ab48aef" dependencies = [ "windows_aarch64_gnullvm 0.53.0", "windows_aarch64_msvc 0.53.0", @@ -5333,9 +5335,9 @@ checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" [[package]] name = "winnow" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c06928c8748d81b05c9be96aad92e1b6ff01833332f281e8cfca3be4b35fc9ec" +checksum = "74c7b26e3480b707944fc872477815d29a8e429d2f93a1ce000f5fa84a15cbcd" dependencies = [ "memchr", ] @@ -5430,7 +5432,7 @@ checksum = "38da3c9736e16c5d3c8c597a9aaa5d1fa565d0532ae05e27c24aa62fb32c0ab6" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", "synstructure", ] @@ -5447,11 +5449,11 @@ dependencies = [ "http 1.3.1", "http-body-util", "hyper 1.6.0", - "hyper-rustls 0.27.6", + "hyper-rustls 0.27.7", "hyper-util", "log", "percent-encoding", - "rustls 0.23.27", + "rustls 0.23.28", "rustls-pemfile 2.2.0", "seahash", "serde", @@ -5463,22 +5465,22 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a1702d9583232ddb9174e01bb7c15a2ab8fb1bc6f227aa1233858c351a3ba0cb" +checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.25" +version = "0.8.26" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "28a6e20d751156648aa063f3800b706ee209a32c0b4d9f24be3d980b01be55ef" +checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -5498,7 +5500,7 @@ checksum = "d71e5d6e06ab090c67b5e44993ec16b72dcbaabc526db883a360057678b48502" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", "synstructure", ] @@ -5519,7 +5521,7 @@ checksum = "ce36e65b0d2999d2aafac989fb249189a141aee1f53c612c1f37d72631959f69" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] @@ -5552,7 +5554,7 @@ checksum = "5b96237efa0c878c64bd89c436f661be4e46b2f3eff1ebb976f7ef2321d2f58f" dependencies = [ "proc-macro2", "quote", - "syn 2.0.101", + "syn 2.0.104", ] [[package]] diff --git a/nexus/analyzer/src/lib.rs b/nexus/analyzer/src/lib.rs index 19601d3f5a..d62f68948f 100644 --- a/nexus/analyzer/src/lib.rs +++ b/nexus/analyzer/src/lib.rs @@ -63,7 +63,7 @@ impl StatementAnalyzer for PeerExistanceAnalyzer<'_> { }; // Necessary as visit_relations fails to deeply visit some structures. - visit_statements(statement, |stmt| { + let _ = visit_statements(statement, |stmt| { match stmt { Statement::Drop { names, .. } => { for name in names { @@ -73,7 +73,7 @@ impl StatementAnalyzer for PeerExistanceAnalyzer<'_> { Statement::Declare { stmts } => { for stmt in stmts { if let Some(ref query) = stmt.for_query { - visit_relations(query, |relation| { + let _ = visit_relations(query, |relation| { analyze_name(&relation.0[0].value); ControlFlow::<()>::Continue(()) }); @@ -85,7 +85,7 @@ impl StatementAnalyzer for PeerExistanceAnalyzer<'_> { ControlFlow::<()>::Continue(()) }); - visit_relations(statement, |relation| { + let _ = visit_relations(statement, |relation| { analyze_name(&relation.0[0].value); ControlFlow::<()>::Continue(()) }); @@ -278,7 +278,7 @@ impl StatementAnalyzer for PeerDDLAnalyzer { let cdc_staging_path = match raw_options.remove("cdc_staging_path") { Some(Expr::Value(ast::Value::SingleQuotedString(s))) => Some(s.clone()), - _ => Some("".to_string()), + _ => None, }; let max_batch_size: Option = match raw_options.remove("max_batch_size") @@ -603,11 +603,6 @@ fn parse_db_options(db_type: DbType, with_options: &[SqlOption]) -> anyhow::Resu Config::BigqueryConfig(bq_config) } DbType::Snowflake => { - let s3_int = opts - .get("s3_integration") - .map(|s| s.to_string()) - .unwrap_or_default(); - let snowflake_config = SnowflakeConfig { account_id: opts .get("account_id") @@ -637,33 +632,19 @@ fn parse_db_options(db_type: DbType, with_options: &[SqlOption]) -> anyhow::Resu .context("unable to parse query_timeout")?, password: opts.get("password").map(|s| s.to_string()), metadata_schema: opts.get("metadata_schema").map(|s| s.to_string()), - s3_integration: s3_int, + s3_integration: opts + .get("s3_integration") + .map(|s| s.to_string()) + .unwrap_or_default(), }; Config::SnowflakeConfig(snowflake_config) } DbType::Mongo => { let mongo_config = MongoConfig { - username: opts - .get("username") - .context("no username specified")? - .to_string(), - password: opts - .get("password") - .context("no password specified")? + uri: opts + .get("uri") + .context("no uri specified")? .to_string(), - clusterurl: opts - .get("clusterurl") - .context("no clusterurl specified")? - .to_string(), - database: opts - .get("database") - .context("no default database specified")? - .to_string(), - clusterport: opts - .get("clusterport") - .context("no cluster port specified")? - .parse::() - .context("unable to parse port as valid int")?, }; Config::MongoConfig(mongo_config) } @@ -728,6 +709,16 @@ fn parse_db_options(db_type: DbType, with_options: &[SqlOption]) -> anyhow::Resu region: opts.get("region").map(|s| s.to_string()), role_arn: opts.get("role_arn").map(|s| s.to_string()), endpoint: opts.get("endpoint").map(|s| s.to_string()), + root_ca: opts.get("root_ca").map(|s| s.to_string()), + tls_host: opts + .get("tls_host") + .map(|s| s.to_string()) + .unwrap_or_default(), + codec: opts + .get("codec") + .and_then(|s| pt::peerdb_peers::AvroCodec::from_str_name(s)) + .map(|codec| codec.into()) + .unwrap_or_default(), }; Config::S3Config(s3_config) } @@ -800,6 +791,7 @@ fn parse_db_options(db_type: DbType, with_options: &[SqlOption]) -> anyhow::Resu .get("tls_host") .map(|s| s.to_string()) .unwrap_or_default(), + s3: None, }; Config::ClickhouseConfig(clickhouse_config) } diff --git a/nexus/catalog/migrations/V46__metadata_raw_table_column.sql b/nexus/catalog/migrations/V46__metadata_raw_table_column.sql new file mode 100644 index 0000000000..4051980812 --- /dev/null +++ b/nexus/catalog/migrations/V46__metadata_raw_table_column.sql @@ -0,0 +1,3 @@ +ALTER TABLE metadata_last_sync_state + ADD COLUMN IF NOT EXISTS latest_batch_id_in_raw_table BIGINT, + ADD COLUMN IF NOT EXISTS table_batch_id_data JSONB default '{}'; diff --git a/nexus/flow-rs/src/grpc.rs b/nexus/flow-rs/src/grpc.rs index 025dead14c..92b5ccf477 100644 --- a/nexus/flow-rs/src/grpc.rs +++ b/nexus/flow-rs/src/grpc.rs @@ -145,6 +145,7 @@ impl FlowGrpcClient { system: system as i32, idle_timeout_seconds: job.sync_interval.unwrap_or_default(), env: Default::default(), + version: 0, // filled in by server }; if job.disable_peerdb_columns { diff --git a/protos/flow.proto b/protos/flow.proto index d90d69e706..2dfecb8f7a 100644 --- a/protos/flow.proto +++ b/protos/flow.proto @@ -76,6 +76,7 @@ message FlowConnectionConfigs { string destination_name = 23; map env = 24; + uint32 version = 25; } message RenameTableOption { @@ -188,6 +189,7 @@ message SetupTableSchemaBatchInput { TypeSystem system = 4; string peer_name = 5; repeated TableMapping table_mappings = 6; + uint32 version = 7; } message SetupNormalizedTableBatchInput { @@ -237,6 +239,11 @@ message UIntPartitionRange { uint64 end = 2; } +message ObjectIdPartitionRange { + string start = 1; + string end = 2; +} + message PartitionRange { // can be a timestamp range or an integer range oneof range { @@ -244,6 +251,7 @@ message PartitionRange { TimestampPartitionRange timestamp_range = 2; TIDPartitionRange tid_range = 3; UIntPartitionRange uint_range = 4; + ObjectIdPartitionRange object_id_range = 5; } } @@ -274,6 +282,7 @@ enum TypeSystem { } message QRepConfig { + reserved 2, 3; string flow_job_name = 1; string destination_table_identifier = 4; @@ -328,6 +337,7 @@ message QRepConfig { repeated string exclude = 26; repeated ColumnSetting columns = 27; + uint32 version = 28; } message QRepPartition { @@ -364,6 +374,7 @@ message TableSchemaDelta { } message QRepFlowState { + reserved 4; QRepPartition last_partition = 1; uint64 num_partitions_processed = 2; bool needs_resync = 3; diff --git a/protos/peers.proto b/protos/peers.proto index c54ce35ff6..26d8e1c5ee 100644 --- a/protos/peers.proto +++ b/protos/peers.proto @@ -63,11 +63,9 @@ message PubSubConfig { } message MongoConfig { - string username = 1; - string password = 2 [(peerdb_redacted) = true]; - string clusterurl = 3; - int32 clusterport = 4; - string database = 5; + // can be a mongodb:// URI mapping to discrete hosts or a mongodb+srv:// URI + // mapping to a DNS SRV record. + string uri = 1 [(peerdb_redacted) = true]; } message AwsAuthStaticCredentialsConfig { @@ -134,6 +132,13 @@ message EventHubGroupConfig { repeated string unnest_columns = 3; } +enum AvroCodec { + Null = 0; + Deflate = 1; + Snappy = 2; + ZStandard = 3; +} + message S3Config { string url = 1; optional string access_key_id = 2 [(peerdb_redacted) = true]; @@ -141,6 +146,9 @@ message S3Config { optional string role_arn = 4; optional string region = 5; optional string endpoint = 6; + optional string root_ca = 7 [(peerdb_redacted) = true]; + string tls_host = 8; + AvroCodec codec = 9; } message ClickhouseConfig{ @@ -159,6 +167,7 @@ message ClickhouseConfig{ optional string private_key = 13 [(peerdb_redacted) = true]; optional string root_ca = 14 [(peerdb_redacted) = true]; string tls_host = 15; + optional S3Config s3 = 16; } message SqlServerConfig { diff --git a/stacks/flow.Dockerfile b/stacks/flow.Dockerfile index 779dc93372..ff658b6887 100644 --- a/stacks/flow.Dockerfile +++ b/stacks/flow.Dockerfile @@ -1,6 +1,6 @@ -# syntax=docker/dockerfile:1.16@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 +# syntax=docker/dockerfile:1.17@sha256:38387523653efa0039f8e1c89bb74a30504e76ee9f565e25c9a09841f9427b05 -FROM golang:1.24-alpine@sha256:b4f875e650466fa0fe62c6fd3f02517a392123eea85f1d7e69d85f780e4db1c1 AS builder +FROM golang:1.24-alpine@sha256:68932fa6d4d4059845c8f40ad7e654e626f3ebd3706eef7846f319293ab5cb7a AS builder RUN apk add --no-cache gcc geos-dev musl-dev WORKDIR /root/flow @@ -20,6 +20,7 @@ ENV CGO_ENABLED=1 RUN go build -o /root/peer-flow FROM alpine:3.22@sha256:8a1f59ffb675680d47db6337b49d22281a139e9d709335b492be023728e11715 AS flow-base +ENV TZ=UTC ADD --checksum=sha256:5fa49cac7e6e9202ef85331c6f83377a71339d692d5644c9417a2d81406f0c03 https://truststore.pki.rds.amazonaws.com/global/global-bundle.pem /usr/local/share/ca-certificates/global-aws-rds-bundle.pem RUN apk add --no-cache ca-certificates geos && \ update-ca-certificates && \ diff --git a/stacks/mongo.compose b/stacks/mongo.compose new file mode 100644 index 0000000000..2550ca83d8 --- /dev/null +++ b/stacks/mongo.compose @@ -0,0 +1,18 @@ +services: + mongodb: + image: mongo:8.0.10 + container_name: mongodb-rs + restart: always + ports: + - "27017:27017" + environment: + MONGO_INITDB_ROOT_USERNAME: admin + MONGO_INITDB_ROOT_PASSWORD: banana + volumes: + - mongo_data:/data/db + - ./mongo/keyfile:/etc/mongodb-keyfile:ro + - ./mongo/init-mongo.sh:/scripts/init-mongo.sh:ro + entrypoint: ["/scripts/init-mongo.sh"] + +volumes: + mongo_data: diff --git a/stacks/mongo/init-mongo.sh b/stacks/mongo/init-mongo.sh new file mode 100755 index 0000000000..a513619eb1 --- /dev/null +++ b/stacks/mongo/init-mongo.sh @@ -0,0 +1,77 @@ +#!/bin/bash +set -euo pipefail +IFS=$'\n\t' + +# Always enable debug mode +set -x + +# Trap to ensure MongoDB shuts down cleanly on script exit +trap 'echo "Caught signal, shutting down..."; kill -TERM $MONGOD_PID 2>/dev/null || true; exit 1' SIGINT SIGTERM + +# Validate required files +if [[ ! -f /etc/mongodb-keyfile ]]; then + echo "ERROR: MongoDB keyfile not found at /etc/mongodb-keyfile" >&2 + exit 1 +fi + +# Start MongoDB without auth first +echo "Starting MongoDB without auth..." +mongod --replSet rs0 --bind_ip_all --fork --ipv6 --logpath /var/log/mongodb.log + +# Wait for MongoDB with timeout +echo "Waiting for MongoDB to start..." +COUNTER=0 +until mongosh --eval "print('connected')" &>/dev/null; do + sleep 1 + COUNTER=$((COUNTER + 1)) + if [[ $COUNTER -ge 10 ]]; then + echo "ERROR: MongoDB failed to start within 10 seconds" >&2 + exit 1 + fi +done + +# Check if replica set is already initialized +if mongosh --quiet --eval "rs.status().ok" 2>/dev/null | grep -q 1; then + echo "Replica set already initialized" +else + echo "Initializing replica set..." + mongosh --eval "rs.initiate({_id: 'rs0', members: [{_id: 0, host: 'localhost:27017'}]})" + sleep 2 +fi + +# Check if admin user exists +if mongosh admin --quiet --eval "db.getUser('${MONGO_INITDB_ROOT_USERNAME}')" 2>/dev/null | grep -q "${MONGO_INITDB_ROOT_USERNAME}"; then + echo "Admin user already exists" +else + echo "Creating admin user..." + mongosh admin --eval " + db.createUser({ + user: '${MONGO_INITDB_ROOT_USERNAME}', + pwd: '${MONGO_INITDB_ROOT_PASSWORD}', + roles: ['root'] + }) + " +fi + +# Get MongoDB PID before shutdown +MONGOD_PID=$(pgrep mongod) + +# Shutdown MongoDB gracefully +echo "Shutting down MongoDB..." +kill -TERM $MONGOD_PID || true + +# Wait for shutdown +COUNTER=0 +while kill -0 $MONGOD_PID 2>/dev/null; do + sleep 1 + COUNTER=$((COUNTER + 1)) + if [[ $COUNTER -ge 10 ]]; then + echo "WARNING: MongoDB didn't shut down cleanly, forcing..." >&2 + kill -KILL $MONGOD_PID 2>/dev/null || true + break + fi +done + +# Start MongoDB with auth (no fork, logs to stdout) +echo "Starting MongoDB with authentication..." +exec mongod --replSet rs0 --bind_ip_all --keyFile /etc/mongodb-keyfile --auth --ipv6 diff --git a/stacks/mysql.Dockerfile b/stacks/mysql.Dockerfile index 82bd26379f..30bd5a2dc2 100644 --- a/stacks/mysql.Dockerfile +++ b/stacks/mysql.Dockerfile @@ -1,7 +1,7 @@ # How to use: # 1. Build the image: docker build -f mysql.Dockerfile --tag 'bin_mysql' . # 2. Run the container: docker run --name alala -e MYSQL_ROOT_PASSWORD= -p 3306:3306 -d bin_mysql -FROM mysql:9.3.0@sha256:04768cb63395f56140b4e92cad7c8d9f48dfa181075316e955da75aadca8a7cd +FROM mysql:9.3.0@sha256:072f96c2f1ebb13f712fd88d0ef98f2ef9a52ad4163ae67b550ed6720b6d642e # Copy the sample configuration file into the container COPY stacks/mysql/my.cnf.sample /etc/mysql/my.cnf diff --git a/stacks/peerdb-server.Dockerfile b/stacks/peerdb-server.Dockerfile index b10172dead..1832c93121 100644 --- a/stacks/peerdb-server.Dockerfile +++ b/stacks/peerdb-server.Dockerfile @@ -24,6 +24,7 @@ WORKDIR /root/nexus RUN cargo build --release --bin peerdb-server FROM alpine:3.22@sha256:8a1f59ffb675680d47db6337b49d22281a139e9d709335b492be023728e11715 +ENV TZ=UTC RUN apk add --no-cache ca-certificates postgresql-client curl iputils && \ adduser -s /bin/sh -D peerdb && \ install -d -m 0755 -o peerdb /var/log/peerdb diff --git a/stacks/peerdb-ui.Dockerfile b/stacks/peerdb-ui.Dockerfile index 1495f6ba31..a5a1209033 100644 --- a/stacks/peerdb-ui.Dockerfile +++ b/stacks/peerdb-ui.Dockerfile @@ -1,7 +1,8 @@ -# syntax=docker/dockerfile:1.16@sha256:e2dd261f92e4b763d789984f6eab84be66ab4f5f08052316d8eb8f173593acf7 +# syntax=docker/dockerfile:1.17@sha256:38387523653efa0039f8e1c89bb74a30504e76ee9f565e25c9a09841f9427b05 # Base stage -FROM node:24-alpine@sha256:91aa1bb6b5f57ec5109155332f4af2aa5d73ff7b4512c8e5dfce5dc88dbbae0e AS base +FROM node:24-alpine@sha256:7aaba6b13a55a1d78411a1162c1994428ed039c6bbef7b1d9859c25ada1d7cc5 AS base +ENV TZ=UTC ENV NPM_CONFIG_UPDATE_NOTIFIER=false RUN apk add --no-cache openssl && \ mkdir /app && \ diff --git a/ui/app/api/peer-types/route.ts b/ui/app/api/peer-types/route.ts index 69c078696a..66fc0956df 100644 --- a/ui/app/api/peer-types/route.ts +++ b/ui/app/api/peer-types/route.ts @@ -30,9 +30,9 @@ export async function GET(request: NextRequest) { 'RDS POSTGRESQL', 'GOOGLE CLOUD POSTGRESQL', 'AZURE FLEXIBLE POSTGRESQL', - 'TEMBO', 'CRUNCHY POSTGRES', 'NEON', + 'MONGO', ]; if (process.env.SUPABASE_ID) { postgresTypes.push({ diff --git a/ui/app/dto/PeersDTO.ts b/ui/app/dto/PeersDTO.ts index 632dfa35d0..6ad70d2742 100644 --- a/ui/app/dto/PeersDTO.ts +++ b/ui/app/dto/PeersDTO.ts @@ -5,6 +5,7 @@ import { EventHubConfig, EventHubGroupConfig, KafkaConfig, + MongoConfig, MySqlConfig, PostgresConfig, PubSubConfig, @@ -15,6 +16,7 @@ import { export type PeerConfig = | PostgresConfig | MySqlConfig + | MongoConfig | SnowflakeConfig | BigqueryConfig | ClickhouseConfig diff --git a/ui/app/mirrors/[mirrorId]/cdcDetails.tsx b/ui/app/mirrors/[mirrorId]/cdcDetails.tsx index bfbc99895c..b0786eba5c 100644 --- a/ui/app/mirrors/[mirrorId]/cdcDetails.tsx +++ b/ui/app/mirrors/[mirrorId]/cdcDetails.tsx @@ -136,7 +136,10 @@ export default function CdcDetails({ - + ); } diff --git a/ui/app/mirrors/[mirrorId]/columnDisplayModal.tsx b/ui/app/mirrors/[mirrorId]/columnDisplayModal.tsx new file mode 100644 index 0000000000..77cb2ac759 --- /dev/null +++ b/ui/app/mirrors/[mirrorId]/columnDisplayModal.tsx @@ -0,0 +1,219 @@ +'use client'; +import { TableMapping } from '@/grpc_generated/flow'; +import { TableColumnsResponse } from '@/grpc_generated/route'; +import { Button } from '@/lib/Button'; +import { Icon } from '@/lib/Icon'; +import { Label } from '@/lib/Label'; +import { Table, TableCell } from '@/lib/Table'; +import * as Dialog from '@radix-ui/react-dialog'; +import { TableRow } from '@tremor/react'; +import { useEffect, useState } from 'react'; + +interface ColumnDisplayModalProps { + isOpen: boolean; + onClose: () => void; + sourceTableIdentifier: string; + destinationTableIdentifier: string; + tableMapping: TableMapping | null; + sourcePeerName: string; +} + +export default function ColumnDisplayModal({ + isOpen, + onClose, + sourceTableIdentifier, + destinationTableIdentifier, + tableMapping, + sourcePeerName, +}: ColumnDisplayModalProps) { + const [columns, setColumns] = useState([]); + const [loading, setLoading] = useState(false); + const [error, setError] = useState(null); + + useEffect(() => { + if (isOpen && sourceTableIdentifier && sourcePeerName) { + const fetchTableColumns = async () => { + try { + setLoading(true); + setError(null); + + // Parse schema and table name from sourceTableIdentifier (e.g., "public.users") + const [schemaName, tableName] = sourceTableIdentifier.split('.'); + + if (!schemaName || !tableName) { + throw new Error('Invalid table identifier format'); + } + + const response: TableColumnsResponse = await fetch( + `/api/v1/peers/columns?peer_name=${encodeURIComponent( + sourcePeerName + )}&schema_name=${encodeURIComponent(schemaName)}&table_name=${encodeURIComponent(tableName)}`, + { + cache: 'no-store', + } + ).then((res) => { + if (!res.ok) { + throw new Error('Failed to fetch columns'); + } + return res.json(); + }); + + setColumns(response.columns || []); + } catch (err) { + console.error('Error fetching columns:', err); + setError( + err instanceof Error ? err.message : 'Failed to fetch columns' + ); + setColumns([]); + } finally { + setLoading(false); + } + }; + + fetchTableColumns(); + } + }, [isOpen, sourceTableIdentifier, sourcePeerName]); + + const excludedColumns = new Set(tableMapping?.exclude || []); + + const sortedColumns = [...columns].sort((a, b) => { + const aExcluded = excludedColumns.has(a.name); + const bExcluded = excludedColumns.has(b.name); + + // Non-excluded columns first, then excluded columns + if (aExcluded !== bExcluded) { + return aExcluded ? 1 : -1; + } + + // Within each group, sort alphabetically + return a.name.localeCompare(b.name); + }); + + return ( + + + + + {/* Header */} +
+
+ + + +
+ + +
+
+ + + +
+ + {/* Content */} +
+ {loading && ( +
+ +
+ )} + + {error && ( +
+ +
+ )} + + {!loading && !error && columns.length === 0 && ( +
+ +
+ )} + + {!loading && !error && columns.length > 0 && ( + + Column Name + Type + Nullable + Primary Key + Status + + } + > + {sortedColumns.map((column) => { + const isExcluded = excludedColumns.has(column.name); + return ( + + + {column.name} + + + {column.type} + + + {column.nullable ? 'Yes' : 'No'} + + + {column.primaryKey ? 'Yes' : 'No'} + + + {isExcluded ? ( + + ) : ( + + )} + + + ); + })} +
+ )} + + {!loading && !error && excludedColumns.size > 0 && ( +
+ +
+ )} +
+ + {/* Footer */} +
+ + + +
+
+
+
+ ); +} diff --git a/ui/app/mirrors/[mirrorId]/tablePairs.tsx b/ui/app/mirrors/[mirrorId]/tablePairs.tsx index 47d00af092..5f8fe88d39 100644 --- a/ui/app/mirrors/[mirrorId]/tablePairs.tsx +++ b/ui/app/mirrors/[mirrorId]/tablePairs.tsx @@ -4,10 +4,20 @@ import { TableMapping } from '@/grpc_generated/flow'; import { SearchField } from '@/lib/SearchField'; import { Table, TableCell } from '@/lib/Table'; import { TableRow } from '@tremor/react'; -import { useMemo, useState } from 'react'; +import React, { useMemo, useState } from 'react'; +import ColumnDisplayModal from './columnDisplayModal'; -export default function TablePairs({ tables }: { tables?: TableMapping[] }) { +export default function TablePairs({ + tables, + sourcePeerName, +}: { + tables?: TableMapping[]; + sourcePeerName: string; +}) { const [searchQuery, setSearchQuery] = useState(''); + const [isModalOpen, setIsModalOpen] = useState(false); + const [selectedTable, setSelectedTable] = useState(null); + const shownTables: TableMapping[] | undefined = useMemo(() => { const shownTables = tables?.filter( (table: TableMapping) => @@ -17,6 +27,19 @@ export default function TablePairs({ tables }: { tables?: TableMapping[] }) { return shownTables?.length ? shownTables : tables; }, [tables, searchQuery]); + const handleTableClick = (table: TableMapping) => { + console.log('Clicked table:', table); + console.log('Table columns:', table.columns); + console.log('Columns length:', table.columns?.length); + setSelectedTable(table); + setIsModalOpen(true); + }; + + const handleCloseModal = () => { + setIsModalOpen(false); + setSelectedTable(null); + }; + return ( tables && (
@@ -40,6 +63,9 @@ export default function TablePairs({ tables }: { tables?: TableMapping[] }) { {shownTables?.map((table) => ( handleTableClick(table)} + style={{ cursor: 'pointer' }} + className='hover:bg-gray-50' > {table.sourceTableIdentifier} @@ -49,6 +75,17 @@ export default function TablePairs({ tables }: { tables?: TableMapping[] }) { ))}
+ + setIsModalOpen(false)} + sourceTableIdentifier={selectedTable?.sourceTableIdentifier ?? ''} + destinationTableIdentifier={ + selectedTable?.destinationTableIdentifier ?? '' + } + tableMapping={selectedTable} + sourcePeerName={sourcePeerName} + /> ) ); diff --git a/ui/app/mirrors/create/handlers.ts b/ui/app/mirrors/create/handlers.ts index a1120b0997..e7696d8608 100644 --- a/ui/app/mirrors/create/handlers.ts +++ b/ui/app/mirrors/create/handlers.ts @@ -258,8 +258,7 @@ export async function handleCreateCDC( } as CreateCDCFlowRequest), }); if (!res.ok) { - // I don't know why but if the order is reversed the error message is not - // shown + // don't know why but if order is reversed the error message is not shown setLoading(false); notifyErr((await res.json()).message || 'Unable to create mirror.'); return; diff --git a/ui/app/mirrors/create/helpers/common.ts b/ui/app/mirrors/create/helpers/common.ts index af299de494..597f34e806 100644 --- a/ui/app/mirrors/create/helpers/common.ts +++ b/ui/app/mirrors/create/helpers/common.ts @@ -44,6 +44,7 @@ export const blankCDCSetting: CDCConfig = { disablePeerDBColumns: false, env: {}, envString: '', + version: 0, }; export const blankQRepSetting: QRepConfig = { @@ -68,6 +69,7 @@ export const blankQRepSetting: QRepConfig = { script: '', system: TypeSystem.Q, env: {}, + version: 0, parentMirrorName: '', exclude: [], columns: [], diff --git a/ui/app/peers/create/[peerType]/handlers.ts b/ui/app/peers/create/[peerType]/handlers.ts index 93384f634a..7bf4946220 100644 --- a/ui/app/peers/create/[peerType]/handlers.ts +++ b/ui/app/peers/create/[peerType]/handlers.ts @@ -6,6 +6,7 @@ import { ElasticsearchConfig, EventHubGroupConfig, KafkaConfig, + MongoConfig, MySqlConfig, Peer, PostgresConfig, @@ -31,6 +32,7 @@ import { ehGroupSchema, esSchema, kaSchema, + mongoSchema, mySchema, peerNameSchema, pgSchema, @@ -105,6 +107,12 @@ function constructPeer( type: DBType.ELASTICSEARCH, elasticsearchConfig: config as ElasticsearchConfig, }; + case 'MONGO': + return { + name, + type: DBType.MONGO, + mongoConfig: config as MongoConfig, + }; default: return; } @@ -183,6 +191,11 @@ async function validateFields( const esConfig = esSchema.safeParse(config); if (!esConfig.success) validationErr = esConfig.error.issues[0].message; break; + case 'MONGO': + const mongoConfig = mongoSchema.safeParse(config); + if (!mongoConfig.success) + validationErr = mongoConfig.error.issues[0].message; + break; default: validationErr = 'Unsupported peer type ' + type; } diff --git a/ui/app/peers/create/[peerType]/helpers/ch.ts b/ui/app/peers/create/[peerType]/helpers/ch.ts index dede1c2d83..15138b3610 100644 --- a/ui/app/peers/create/[peerType]/helpers/ch.ts +++ b/ui/app/peers/create/[peerType]/helpers/ch.ts @@ -1,5 +1,6 @@ import { ClickhouseConfig } from '@/grpc_generated/peers'; import { PeerSetting } from './common'; +import { blankS3Setting } from './s3'; export const clickhouseSetting: PeerSetting[] = [ { @@ -44,96 +45,177 @@ export const clickhouseSetting: PeerSetting[] = [ tips: 'If you are using a non-TLS connection for ClickHouse server, check this box.', optional: true, }, + { + label: 'Certificate', + stateHandler: (value, setter) => { + if (!value) { + // remove key from state if empty + setter((curr) => { + const newCurr = { ...curr } as ClickhouseConfig; + delete newCurr.certificate; + return newCurr; + }); + } else setter((curr) => ({ ...curr, certificate: value as string })); + }, + type: 'file', + optional: true, + tips: 'This is only needed if the user is authenticated via certificate.', + }, + { + label: 'Private Key', + stateHandler: (value, setter) => { + if (!value) { + // remove key from state if empty + setter((curr) => { + const newCurr = { ...curr } as ClickhouseConfig; + delete newCurr.privateKey; + return newCurr; + }); + } else setter((curr) => ({ ...curr, privateKey: value as string })); + }, + type: 'file', + optional: true, + tips: 'This is only needed if the user is authenticated via certificate.', + }, + { + label: 'Root Certificate', + stateHandler: (value, setter) => { + if (!value) { + // remove key from state if empty + setter((curr) => { + const newCurr = { ...curr } as ClickhouseConfig; + delete newCurr.rootCa; + return newCurr; + }); + } else setter((curr) => ({ ...curr, rootCa: value as string })); + }, + type: 'file', + optional: true, + tips: 'If not provided, host CA roots will be used.', + }, { label: 'S3 Path', stateHandler: (value, setter) => - setter((curr) => ({ ...curr, s3Path: value as string })), + setter((curr) => ({ + ...curr, + s3: { + ...blankS3Setting, + ...(curr as ClickhouseConfig).s3, + url: value as string, + }, + })), tips: `This is an S3 bucket/object URL field. This bucket will be used as our intermediate stage for CDC`, placeholder: 's3://', + s3: true, }, { label: 'Access Key ID', stateHandler: (value, setter) => - setter((curr) => ({ ...curr, accessKeyId: value as string })), + setter((curr) => ({ + ...curr, + s3: { + ...blankS3Setting, + ...(curr as ClickhouseConfig).s3, + accessKeyId: value as string, + }, + })), tips: 'The AWS access key ID associated with your account.', helpfulLink: 'https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html', + s3: true, }, { label: 'Secret Access Key', stateHandler: (value, setter) => - setter((curr) => ({ ...curr, secretAccessKey: value as string })), + setter((curr) => ({ + ...curr, + s3: { + ...blankS3Setting, + ...(curr as ClickhouseConfig).s3, + secretAccessKey: value as string, + }, + })), tips: 'The AWS secret access key associated with the above bucket.', helpfulLink: 'https://docs.aws.amazon.com/IAM/latest/UserGuide/id_credentials_access-keys.html', + s3: true, }, { label: 'Region', stateHandler: (value, setter) => - setter((curr) => ({ ...curr, region: value as string })), + setter((curr) => ({ + ...curr, + s3: { + ...blankS3Setting, + ...(curr as ClickhouseConfig).s3, + region: value as string, + }, + })), tips: 'The region where your bucket is located. For example, us-east-1.', + s3: true, }, { label: 'Endpoint', stateHandler: (value, setter) => - setter((curr) => ({ ...curr, endpoint: value as string })), + setter((curr) => ({ + ...curr, + s3: { + ...blankS3Setting, + ...(curr as ClickhouseConfig).s3, + endpoint: value as string, + }, + })), helpfulLink: 'https://docs.aws.amazon.com/general/latest/gr/s3.html#s3_region', tips: 'An endpoint is the URL of the entry point for an AWS web service.', optional: true, + s3: true, }, { - label: 'Certificate', - stateHandler: (value, setter) => { - if (!value) { - // remove key from state if empty - setter((curr) => { - delete (curr as ClickhouseConfig)['certificate']; - return curr; - }); - } else setter((curr) => ({ ...curr, certificate: value as string })); - }, - type: 'file', - optional: true, - tips: 'This is only needed if the user is authenticated via certificate.', - }, - { - label: 'Private Key', + label: 'S3 Root Certificate', stateHandler: (value, setter) => { if (!value) { // remove key from state if empty setter((curr) => { - delete (curr as ClickhouseConfig)['privateKey']; + const s3 = (curr as ClickhouseConfig).s3; + if (s3) { + const new3 = { ...s3 }; + delete new3.rootCa; + return { ...curr, s3: new3 }; + } return curr; }); - } else setter((curr) => ({ ...curr, privateKey: value as string })); - }, - type: 'file', - optional: true, - tips: 'This is only needed if the user is authenticated via certificate.', - }, - { - label: 'Root Certificate', - stateHandler: (value, setter) => { - if (!value) { - // remove key from state if empty - setter((curr) => { - delete (curr as ClickhouseConfig)['rootCa']; - return curr; - }); - } else setter((curr) => ({ ...curr, rootCa: value as string })); + } else { + setter((curr) => ({ + ...curr, + s3: { + ...blankS3Setting, + ...(curr as ClickhouseConfig).s3, + rootCa: value as string, + }, + })); + } }, type: 'file', optional: true, tips: 'If not provided, host CA roots will be used.', + s3: true, }, { - label: 'TLS Hostname', + label: 'S3 TLS Hostname', field: 'tlsHost', stateHandler: (value, setter) => - setter((curr) => ({ ...curr, tlsHost: value as string })), + setter((curr) => ({ + ...curr, + s3: { + ...blankS3Setting, + ...(curr as ClickhouseConfig).s3, + tlsHost: value as string, + }, + })), tips: 'Overrides expected hostname during tls cert verification.', optional: true, + s3: true, }, ]; diff --git a/ui/app/peers/create/[peerType]/helpers/common.ts b/ui/app/peers/create/[peerType]/helpers/common.ts index 2a5b4ff2b7..337d23e65a 100644 --- a/ui/app/peers/create/[peerType]/helpers/common.ts +++ b/ui/app/peers/create/[peerType]/helpers/common.ts @@ -4,6 +4,7 @@ import { blankClickHouseSetting } from './ch'; import { blankEventHubGroupSetting } from './eh'; import { blankElasticsearchSetting } from './es'; import { blankKafkaSetting } from './ka'; +import { blankMongoSetting } from './mo'; import { blankMySqlSetting } from './my'; import { blankPostgresSetting } from './pg'; import { blankPubSubSetting } from './ps'; @@ -21,6 +22,7 @@ export interface PeerSetting { default?: string | number; placeholder?: string; options?: { value: string; label: string }[]; + s3?: true | undefined; } export function getBlankSetting(dbType: string): PeerConfig { @@ -45,6 +47,8 @@ export function getBlankSetting(dbType: string): PeerConfig { return blankEventHubGroupSetting; case 'ELASTICSEARCH': return blankElasticsearchSetting; + case 'MONGO': + return blankMongoSetting; default: return blankPostgresSetting; } diff --git a/ui/app/peers/create/[peerType]/helpers/mo.ts b/ui/app/peers/create/[peerType]/helpers/mo.ts new file mode 100644 index 0000000000..a2d2499222 --- /dev/null +++ b/ui/app/peers/create/[peerType]/helpers/mo.ts @@ -0,0 +1,16 @@ +import { MongoConfig } from '@/grpc_generated/peers'; +import { PeerSetting } from './common'; + +export const mongoSetting: PeerSetting[] = [ + { + label: 'Uri', + field: 'uri', + stateHandler: (value, setter) => + setter((curr) => ({ ...curr, uri: value as string })), + tips: 'MongoDB connection string', + }, +]; + +export const blankMongoSetting: MongoConfig = { + uri: '', +}; diff --git a/ui/app/peers/create/[peerType]/helpers/my.ts b/ui/app/peers/create/[peerType]/helpers/my.ts index 145ec90d13..e5483bba0f 100644 --- a/ui/app/peers/create/[peerType]/helpers/my.ts +++ b/ui/app/peers/create/[peerType]/helpers/my.ts @@ -104,8 +104,9 @@ export const mysqlSetting: PeerSetting[] = [ if (!value) { // remove key from state if empty setter((curr) => { - delete (curr as MySqlConfig)['rootCa']; - return curr; + const newCurr = { ...curr } as MySqlConfig; + delete newCurr.rootCa; + return newCurr; }); } else setter((curr) => ({ ...curr, rootCa: value as string })); }, diff --git a/ui/app/peers/create/[peerType]/helpers/pg.ts b/ui/app/peers/create/[peerType]/helpers/pg.ts index ad1da01c7b..dfe4c75be2 100644 --- a/ui/app/peers/create/[peerType]/helpers/pg.ts +++ b/ui/app/peers/create/[peerType]/helpers/pg.ts @@ -66,8 +66,9 @@ export const postgresSetting: PeerSetting[] = [ if (!value) { // remove key from state if empty setter((curr) => { - delete (curr as PostgresConfig)['rootCa']; - return curr; + const newCurr = { ...curr } as PostgresConfig; + delete newCurr.rootCa; + return newCurr; }); } else setter((curr) => ({ ...curr, rootCa: value as string })); }, diff --git a/ui/app/peers/create/[peerType]/helpers/s3.ts b/ui/app/peers/create/[peerType]/helpers/s3.ts index 8e98b91649..597398a694 100644 --- a/ui/app/peers/create/[peerType]/helpers/s3.ts +++ b/ui/app/peers/create/[peerType]/helpers/s3.ts @@ -1,4 +1,4 @@ -import { S3Config } from '@/grpc_generated/peers'; +import { AvroCodec, S3Config, avroCodecFromJSON } from '@/grpc_generated/peers'; import { PeerSetting } from './common'; export const s3Setting: PeerSetting[] = [ @@ -50,6 +50,44 @@ export const s3Setting: PeerSetting[] = [ 'https://docs.aws.amazon.com/IAM/latest/UserGuide/reference_identifiers.html#identifiers-arns', optional: true, }, + { + label: 'Root Certificate', + stateHandler: (value, setter) => { + if (!value) { + // remove key from state if empty + setter((curr) => { + const newCurr = { ...curr } as S3Config; + delete newCurr.rootCa; + return newCurr; + }); + } else setter((curr) => ({ ...curr, rootCa: value as string })); + }, + type: 'file', + optional: true, + tips: 'If not provided, host CA roots will be used.', + }, + { + label: 'TLS Host', + field: 'tlsHost', + stateHandler: (value, setter) => + setter((curr) => ({ ...curr, tlsHost: value as string })), + tips: 'Overrides expected hostname during tls cert verification.', + optional: true, + }, + { + label: 'Avro Codec', + field: 'codec', + stateHandler: (value, setter) => + setter((curr) => ({ ...curr, codec: avroCodecFromJSON(value) })), + type: 'select', + placeholder: 'Select avro codec', + options: [ + { value: 'Null', label: 'Null' }, + { value: 'Deflate', label: 'Deflate' }, + { value: 'Snappy', label: 'Snappy' }, + { value: 'ZStandard', label: 'ZStandard' }, + ], + }, ]; export const blankS3Setting: S3Config = { @@ -59,4 +97,7 @@ export const blankS3Setting: S3Config = { roleArn: undefined, region: undefined, endpoint: '', + rootCa: undefined, + tlsHost: '', + codec: AvroCodec.Null, }; diff --git a/ui/app/peers/create/[peerType]/helpers/sf.ts b/ui/app/peers/create/[peerType]/helpers/sf.ts index 7ab474d1e8..7866c055bb 100644 --- a/ui/app/peers/create/[peerType]/helpers/sf.ts +++ b/ui/app/peers/create/[peerType]/helpers/sf.ts @@ -54,8 +54,9 @@ export const snowflakeSetting: PeerSetting[] = [ if (!value) { // remove password key from state if empty setter((curr) => { - delete (curr as SnowflakeConfig)['password']; - return curr; + const newCurr = { ...curr } as SnowflakeConfig; + delete newCurr.password; + return newCurr; }); } else setter((curr) => ({ ...curr, password: value as string })); }, diff --git a/ui/app/peers/create/[peerType]/page.tsx b/ui/app/peers/create/[peerType]/page.tsx index a25fb70b6c..74930ca140 100644 --- a/ui/app/peers/create/[peerType]/page.tsx +++ b/ui/app/peers/create/[peerType]/page.tsx @@ -4,6 +4,7 @@ import GuideForDestinationSetup from '@/app/mirrors/create/cdc/guide'; import BigqueryForm from '@/components/PeerForms/BigqueryConfig'; import ClickHouseForm from '@/components/PeerForms/ClickhouseConfig'; import KafkaForm from '@/components/PeerForms/KafkaConfig'; +import MongoForm from '@/components/PeerForms/MongoForm'; import MySqlForm from '@/components/PeerForms/MySqlForm'; import PostgresForm from '@/components/PeerForms/PostgresForm'; import PubSubForm from '@/components/PeerForms/PubSubConfig'; @@ -62,7 +63,6 @@ export default function CreateConfig({ const getDBType = () => { if ( peerType.includes('POSTGRES') || - peerType.includes('TEMBO') || peerType.includes('NEON') || peerType.includes('SUPABASE') ) { @@ -74,7 +74,7 @@ export default function CreateConfig({ return peerType; }; - const configComponentMap = (peerType: string) => { + const configComponentMap = () => { switch (getDBType()) { case 'POSTGRES': return ( @@ -82,7 +82,6 @@ export default function CreateConfig({ settings={postgresSetting} setter={setConfig} config={config as PostgresConfig} - type={peerType} /> ); case 'MYSQL': @@ -121,6 +120,8 @@ export default function CreateConfig({ setter={setConfig} /> ); + case 'MONGO': + return ; default: return <>; } @@ -206,7 +207,7 @@ export default function CreateConfig({ Configuration -
{configComponentMap(peerType)}
+
{configComponentMap()}
- + ); } diff --git a/ui/components/PeerForms/KafkaConfig.tsx b/ui/components/PeerForms/KafkaConfig.tsx index 7fb40d6aee..4cb22af8a9 100644 --- a/ui/components/PeerForms/KafkaConfig.tsx +++ b/ui/components/PeerForms/KafkaConfig.tsx @@ -26,7 +26,7 @@ export default function KafkaForm({ setter }: KafkaProps) { {!setting.optional && (