refactor to add ingest batchs for better traceability

This commit is contained in:
2025-10-23 18:11:26 +02:00
parent 1f0eb18a5f
commit fb4d31afba
6 changed files with 104 additions and 23 deletions
+59 -16
View File
@@ -8,8 +8,21 @@ import (
"github.com/jackc/pgx/v5/pgxpool"
)
type InsertMeteoDataParams struct {
FileChecksum string
Accepted []MeteoData
Rejected []RejectedMeteoData
}
type InsertMeteoDataResult struct {
BatchID int
AcceptedCount int
RejectedCount int
}
type Repository interface {
InsertMeteoDataTX(ctx context.Context, accepted []MeteoData, rejected []RejectedMeteoData) (int, int, error)
InsertMeteoDataTX(ctx context.Context, params InsertMeteoDataParams) (*InsertMeteoDataResult, error)
UpdateBatchElapsedTime(ctx context.Context, batchID int, elapsedMS int) error
}
type pgxRepo struct {
@@ -22,33 +35,53 @@ func NewPGXRepo(pool *pgxpool.Pool) Repository {
}
}
const insertAcceptedMeteoData = `insert into public.meteo_data (location_name, max_temp, min_temp, rainfall, cloudiness, created_at) values ($1, $2, $3, $4, $5, $6) returning id`
func (pgx *pgxRepo) InsertMeteoDataTX(ctx context.Context, accepted []MeteoData, rejected []RejectedMeteoData) (int, int, error) {
func (pgx *pgxRepo) InsertMeteoDataTX(ctx context.Context, params InsertMeteoDataParams) (*InsertMeteoDataResult, error) {
tx, err := pgx.Begin(ctx)
if err != nil {
return 0, 0, fmt.Errorf("error starting transaction: %w", err)
return nil, fmt.Errorf("error starting transaction: %w", err)
}
defer tx.Rollback(ctx)
acceptedCount, err := pgx.insertAcceptedMeteoData(ctx, tx, accepted)
batchID, err := pgx.insertBatch(ctx, tx, params.FileChecksum)
if err != nil {
return 0, 0, err
return nil, err
}
rejectedCount, err := pgx.insertRejectedMeteoData(ctx, tx, rejected)
acceptedCount, err := pgx.insertAcceptedMeteoData(ctx, tx, batchID, params.Accepted)
if err != nil {
return 0, 0, err
return nil, err
}
rejectedCount, err := pgx.insertRejectedMeteoData(ctx, tx, batchID, params.Rejected)
if err != nil {
return nil, err
}
if err = tx.Commit(ctx); err != nil {
return 0, 0, fmt.Errorf("error committing transaction: %w", err)
return nil, fmt.Errorf("error committing transaction: %w", err)
}
return acceptedCount, rejectedCount, nil
return &InsertMeteoDataResult{
BatchID: batchID,
AcceptedCount: acceptedCount,
RejectedCount: rejectedCount,
}, nil
}
func (pgx *pgxRepo) insertAcceptedMeteoData(ctx context.Context, tx b.Tx, data []MeteoData) (int, error) {
const insertBatch = `insert into public.ingest_batch (elapsed_ms, file_checksum) values ($1, $2) returning id`
func (pgx *pgxRepo) insertBatch(ctx context.Context, tx b.Tx, fileChecksum string) (int, error) {
var batchID int
err := tx.QueryRow(ctx, insertBatch, 0, fileChecksum).Scan(&batchID)
if err != nil {
return 0, fmt.Errorf("error inserting batch: %w", err)
}
return batchID, nil
}
const insertAcceptedMeteoData = `insert into public.meteo_data (batch_id, location_name, date_of_register, max_temp, min_temp, rainfall, cloudiness) values ($1, $2, $3, $4, $5, $6, $7) returning id`
func (pgx *pgxRepo) insertAcceptedMeteoData(ctx context.Context, tx b.Tx, batchID int, data []MeteoData) (int, error) {
if len(data) == 0 {
return 0, nil
}
@@ -56,7 +89,7 @@ func (pgx *pgxRepo) insertAcceptedMeteoData(ctx context.Context, tx b.Tx, data [
batch := &b.Batch{}
for _, d := range data {
batch.Queue(insertAcceptedMeteoData, d.Location, d.MaxTemp, d.MinTemp, d.Rainfall, d.Cloudiness, d.Timestamp)
batch.Queue(insertAcceptedMeteoData, batchID, d.Location, d.Timestamp, d.MaxTemp, d.MinTemp, d.Rainfall, d.Cloudiness)
}
results := tx.SendBatch(ctx, batch)
@@ -76,9 +109,9 @@ func (pgx *pgxRepo) insertAcceptedMeteoData(ctx context.Context, tx b.Tx, data [
return rowsInserted, nil
}
const insertRejectedMeteoData = `insert into public.rejected_data (raw_data, reason) values ($1, $2) returning id`
const insertRejectedMeteoData = `insert into public.rejected_data (batch_id, raw_data, reason) values ($1, $2, $3) returning id`
func (pgx *pgxRepo) insertRejectedMeteoData(ctx context.Context, tx b.Tx, data []RejectedMeteoData) (int, error) {
func (pgx *pgxRepo) insertRejectedMeteoData(ctx context.Context, tx b.Tx, batchID int, data []RejectedMeteoData) (int, error) {
if len(data) == 0 {
return 0, nil
}
@@ -86,7 +119,7 @@ func (pgx *pgxRepo) insertRejectedMeteoData(ctx context.Context, tx b.Tx, data [
batch := &b.Batch{}
for _, d := range data {
batch.Queue(insertRejectedMeteoData, d.RowValue, d.Reason)
batch.Queue(insertRejectedMeteoData, batchID, d.RowValue, d.Reason)
}
results := tx.SendBatch(ctx, batch)
@@ -105,3 +138,13 @@ func (pgx *pgxRepo) insertRejectedMeteoData(ctx context.Context, tx b.Tx, data [
return rowsInserted, nil
}
const updateBatchElapsedTime = `update public.ingest_batch set elapsed_ms = $1 where id = $2`
func (pgx *pgxRepo) UpdateBatchElapsedTime(ctx context.Context, batchID int, elapsedMS int) error {
_, err := pgx.Exec(ctx, updateBatchElapsedTime, elapsedMS, batchID)
if err != nil {
return fmt.Errorf("error updating batch elapsed time: %w", err)
}
return nil
}