Dean Capps

SQL Server DDL Execution – displaying errors below each statement

When working with large SQL Server databases, DBAs often need to execute DDL scripts containing thousands of lines of code. While SQLCMD provides a straightforward way to execute these scripts, its error reporting can be deficient when troubleshooting failures in large scripts. Recently, I encountered this issue while executing a large DDL file. The standard SQLCMD output only showed generic errors like “Function ‘fn_xyz’ already exists with the same name” without providing context about which specific statement caused the error. The line number displayed usually refers to the line number in the DDL statement and not the line number in the context of the larger DDL file.

For effective troubleshooting, I needed to see the SQL (DDL) statement that was executed followed by the error. Having these elements together would allow me to quickly correlate errors with their corresponding SQL statements, significantly reducing debugging time. To fulfill this need, I wrote a Python solution that provides granular execution control and comprehensive output logging. This approach offers several advantages:

Individual statement execution
Paired output of SQL and results
Clear separation between statements

The python code and sample successful and unsuccessful results are below.

Python code

import pyodbc
import os
from datetime import datetime

def execute_sql_file(input_file, output_file, server, user, password):
    # Get password from environment variable for security
    connection_string = f'DRIVER={{SQL Server}};SERVER={server};DATABASE=master;UID={user};PWD={password}'
    
    try:
        # Establish connection
        conn = pyodbc.connect(connection_string)
        cursor = conn.cursor()
        
        current_batch = []
        
        with open(input_file, 'r') as infile, \
             open(output_file, 'w') as outfile:
            
            # Write header with timestamp
            outfile.write(f"SQL Execution Results - {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")
            outfile.write("-"*80 + "\n\n")
            
            for line in infile:
                # line = line.strip()
                line = line.rstrip('\n\r')  # removes both \n and \r
                
                # Skip empty lines
                # if not line:
                    # continue
                
                # If we hit a GO statement, execute the batch
                test_line = line
                if test_line.strip().lower() == 'go':
                    if current_batch:
                        sql_statement = '\n'.join(current_batch)
                        
                        # Write the input SQL to output file
                        outfile.write("Input SQL:\n")
                        outfile.write("-"*11 + "\n")
                        outfile.write(sql_statement + "\n\n")
                        
                        try:
                            # Execute the SQL batch
                            results = cursor.execute(sql_statement)
                            
                            # Write results header
                            outfile.write("Results:\n")
                            outfile.write("-"*8 + "\n")
                            
                            # Fetch and write column names if available
                            if cursor.description:
                                columns = [column[0] for column in cursor.description]
                                outfile.write(" | ".join(columns) + "\n")
                                outfile.write("-" * (len(" | ".join(columns))) + "\n")
                                
                                # Fetch and write results
                                for row in results:
                                    outfile.write(" | ".join(str(value) for value in row) + "\n")
                            
                            conn.commit()
                            
                        except pyodbc.Error as e:
                            # Seems to return error 'Protocol error in TDS stream' if the object exists
                            # so ignoring
                            if  e.args[0] == 'HY000' and 'Protocol error in TDS stream' in str(e):
                                outfile.write(f"Ignoring TDS Protocol error: {str(e)}\n")
                                outfile.write("Possibly caused by object already existing\n")
                                pass
                            else:
                                outfile.write(f"Error executing SQL: {str(e)}\n")
                                conn.rollback()
                            
                        outfile.write("\n" + "="*100 + "\n\n")
                        current_batch = []
                else:
                    current_batch.append(line)
                    
    except pyodbc.Error as e:
        print(f"Database connection error: {str(e)}")
    finally:
        if 'conn' in locals():
            conn.close()

def main():
    # Configuration
    input_file = "input.sql"
    output_file = "sql_results.txt"
    server = "my_server"
    user = "my_user"
    password = "my_password"
    
    execute_sql_file(input_file, output_file, server, user, password)
    print("SQL execution completed. Check output file for results.")

if __name__ == "__main__":
    main()

Successful execution

SQL Execution Results - 2025-01-29 20:02:42
--------------------------------------------------------------------------------

Input SQL:
-----------
USE [dcdb01]

Results:
--------

=====================================================================================

Input SQL:
-----------

use [dcdb01]

Results:
--------

=====================================================================================

Input SQL:
-----------
CREATE TABLE [dbo].[dc_test_01](
	[id_01] [bigint] NOT NULL,
	[data_01] [varchar](10) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
 CONSTRAINT [id_01] PRIMARY KEY CLUSTERED 
(
	[id_01] ASC
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

Results:
--------

=====================================================================================

Input SQL:
-----------
CREATE TABLE [dbo].[dc_test_02](
	[id_02] [bigint] NOT NULL,
	[data_02] [varchar](10) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
 CONSTRAINT [id_02] PRIMARY KEY CLUSTERED 
(
	[id_02] ASC
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

Results:
--------

=====================================================================================

Unsuccessful execution

SQL Execution Results - 2025-01-29 20:01:38
--------------------------------------------------------------------------------

Input SQL:
-----------
USE [dcdb01]

Results:
--------

=====================================================================================

Input SQL:
-----------

use [dcdb01]

Results:
--------

=====================================================================================

Input SQL:
-----------
CREATE TABLE [dbo].[dc_test_01](
	[id_01] [bigint] NOT NULL,
	[data_01] [varchar](10) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
 CONSTRAINT [id_01] PRIMARY KEY CLUSTERED 
(
	[id_01] ASC
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

Ignoring TDS Protocol error: ('HY000', '[HY000] [Microsoft][ODBC SQL Server Driver]Protocol error in TDS stream (0) (SQLExecDirectW)')
Possibly caused by object already existing

=====================================================================================

Input SQL:
-----------
CREATE TABLE [dbo].[dc_test_02](
	[id_02] [bigint] NOT NULL,
	[data_02] [varchar](10) COLLATE SQL_Latin1_General_CP1_CI_AS NULL
 CONSTRAINT [id_02] PRIMARY KEY CLUSTERED 
(
	[id_02] ASC
) 
WITH (PAD_INDEX = OFF, STATISTICS_NORECOMPUTE = OFF, IGNORE_DUP_KEY = OFF, ALLOW_ROW_LOCKS = ON, ALLOW_PAGE_LOCKS = ON, OPTIMIZE_FOR_SEQUENTIAL_KEY = OFF) ON [PRIMARY]
) ON [PRIMARY] TEXTIMAGE_ON [PRIMARY]

Ignoring TDS Protocol error: ('HY000', '[HY000] [Microsoft][ODBC SQL Server Driver]Protocol error in TDS stream (0) (SQLExecDirectW)')
Possibly caused by object already existing

=====================================================================================

Harnessing Amazon Bedrock: A Simple Image Generation Demo with Stability AI

In today’s rapidly evolving AI landscape, Amazon Bedrock is a quick, powerful, and easy to use platform for leveraging foundation models. This blog post will guide you through creating a simple web solution to demonstrate Stability AI’s image generation capabilities using Amazon Bedrock.

The goal is to build a simple website that accepts user prompts and generates corresponding images. This demo demonstrates the potential of AI-driven image creation and provides hands-on experience together with actual code. There is no reliance on PowerPoint decks .

Infrastructure Setup
For this demonstration, I will be running the web server on an Amazon EC2 instance running Amazon Linux 2. The instance will be equipped with Python 3, Flask for web framework functionality, and boto3 for AWS SDK interactions.

A second windows EC2 instance located in the same VPC and subnet will be used to access the web page. The security group of the Linux server should allow connections from the windows server on port 5000.

Accessing Stability AI Model
Before executing the code, you must request access to the Stability AI model via the Amazon Bedrock console. This step allows the EC2 instance to execute the Stability AI model.

IAM Policy Configuration
To allow our EC2 instance to access the Stability AI model, we need to attach the below IAM policy:

        {
            "Effect": "Allow",
            "Action": [
                "bedrock:InvokeModel",
                "bedrock:InvokeModelWithResponseStream"
            ],
            "Resource": [
                "arn:aws:bedrock:us-east-1::foundation-model/stability.stable-diffusion-xl-v1"
            ]
        }

This can be added as an inline policy to the IAM role used by your EC2 instance.

Upload the python code (see download link at bottom of page) to a file named my_ai_web.py and start the web server with the command python ./my_ai_web.py as shown in the below image.

Sign on to the windows server and navigate to the second ip displayed in the above image. The following web page should be displayed:

Enter a prompt and click on the “Generate Image” button as shown below:

At this point the code makes a call to bedrock to process the prompt and generate the image:

After the image is generated, it is displayed as shown below:

Python/Flask code download link

National Museum of World War II Aviation’s Corsair flight

Hearing first-hand the roar of the Pratt and Whitney engine and seeing the legendary Corsair take off, was a once-in-a-lifetime experience for me! Many thanks to the National Museum of World War II Aviation for putting on such an awesome demonstration this past weekend. It was amazing seeing history in flight!

Sit back and relax, the videos are on their way! Just give them a sec to load—they’re best enjoyed in full screen!

Engine start

Taxi

Engine shutdown and wings folding

For those interested in the details of this magnificent aircraft, check out this link: Brewster F3A Corsair.

If you are ever in Colorado Springs, Colorado, spend a few hours at the museum National Museum of World War II Aviation.

Using IAM Authentication with Redshift

Managing user access to your Amazon Redshift database can be a headache, especially when it comes to maintaining individual user accounts and passwords. However, there’s a better way – you can leverage your existing AWS Identity and Access Management (IAM) permissions to generate temporary database credentials for your users.

By configuring your system to authenticate users based on their IAM credentials, you can eliminate the need to create and manage individual database user accounts. Users can simply log in to your Redshift database using their IAM credentials, and your system will generate the necessary database credentials on-the-fly.

This approach offers several benefits. It streamlines user management, as you can leverage your existing IAM permissions structure to control access to the database. It also enhances security, as you don’t have to worry about users sharing or mishandling database passwords.

The official AWS documentation is at Using IAM authentication to generate database user credentials.

In my particular use case, I had to connect to a Redshift database with temporary IAM authentication and there is one small wrinkle that cost me a couple of hours. I am documenting this together with the code in the hope that I might save someone else some time in the future.

Below is my python code

import boto3
from botocore.exceptions import ClientError
import psycopg
psycopg._encodings._py_codecs["UNICODE"] = "utf-8"
psycopg._encodings.py_codecs.update(
    (k.encode(), v) for k, v in psycopg._encodings._py_codecs.items()
)
from psycopg import OperationalError

def connect_to_the_database(db_user, cluster_identifier, db_name, db_host, db_port):

    print('Original DB user     : ', db_user)

    redshift = boto3.client('redshift', 'us-west-1')
    try:
        credentials = redshift.get_cluster_credentials(
            DbUser = db_user,
            DbName = db_name,
            ClusterIdentifier = cluster_identifier,
            DurationSeconds = 900,
            AutoCreate = False,
        )
    except ClientError as e:
        print('Error getting cluster credentials')
    
    db_password = credentials.get("DbPassword")
    db_user     = credentials.get("DbUser")

    print('Modified IAM DB user : ', db_user)
    print('IAM DB password      : ', db_password)

    connection_args = 'host={} port={} dbname={} user={} password={}'.format(
        db_host,
        db_port,
        db_name,
        db_user,
        db_password
    )
    try:
        conn = psycopg.connect(connection_args)
    except psycopg.Error as e:
        print('Error connecting to the database')

    return conn

#
## main processing
#
if __name__ == '__main__':
    db_user='my_db_user'
    cluster_identifier='my-redshift-cluster'
    db_name='test'
    db_host='my-redshift-cluster.xxx.us-west-1.redshift.amazonaws.com'
    db_port=5439
    conn = connect_to_the_database(db_user, cluster_identifier, db_name, db_host, db_port)

Note that in the main processing the user name is “my_db_user” and this is the name passed to the function “connect_to_the_database”. This is confirmed by the print statement at the start of the function:

print('Original DB user     : ', db_user)

My expectation was that the call to BOTO3’s get_cluster_credentials would return a password that I would use to connect to the database with. However, you also have to extract the modified user name from the returned “credentials” dictionary object to get the modified user name. This modified user name and temporary credentials are used to connect to the database.

The output of the program’s print statements is below:

Original DB user     :  my_db_user
Modified IAM DB user :  IAM:my_db_user
IAM DB password      :  JmRCSmqxEyw2Seo89mWUg2H1PG9pONROZJBXA==

Note the difference between the original DB user and the modified IAM DB user. The user that you want to connect to the database has to be prefixed with the text “IAM:”. This, IAM:my_db_user, is the user name that will appear in the stv_sessions, stv_inflight etc. views.

Create and deploy a lambda written in Python accessing PostgreSQL or Redshift

In this blog I will demonstrate the creation of a Lambda written in Python to access a Postgres or Redshift database. As both these database engines are based on Postgres, the same process will work.

In order to access Postgres from Lambda we need to create and deploy a Lambda layer containing Psycopg, the most popular PostgreSQL database adapter for the Python programming language. I will be deploying version 3 of Psycopg.

Prerequisites
The first step is to create the software in a zip file required by the layer. A typical approach is to perform these steps on an EC2. This does require that an EC2 be deployed with the required permissions (IAM Role). Instead, I will be performing these commands in a Cloudshell environment. If you are connected to the AWS Management Console, either search for “Cloudshell” or click on the icon usually located in the top right corner of the web page.

Important note : This blog is using Python version 3.12 in all the commands. If you need to use a different version, change “3.12” to the version you need.

Install Psycopg
Make a directory to house the software. It is important that this directory structure be exactly as defined below as we will be zipping this directory to create the layer.

mkdir -p dc-layer/python

And then change directory to the above directory

cd dc-layer

Install the software with the command

pip3 install psycopg[binary] \
		--python-version 3.12 \
		--only-binary=:all: \
		--platform manylinux2014_x86_64 \
		-t dc-layer/python

The following directory structure and files should exist

/home/cloudshell-user $ pwd
/home/cloudshell-user/dc-layer/python
/home/cloudshell-user $ ls -l
total 160
drwxr-xr-x. 6 cloudshell-user cloudshell-user   4096 Jul  8 20:37 psycopg
drwxr-xr-x. 2 cloudshell-user cloudshell-user   4096 Jul  8 20:37 psycopg-3.2.1.dist-info
drwxr-xr-x. 3 cloudshell-user cloudshell-user   4096 Jul  8 20:37 psycopg_binary
drwxr-xr-x. 2 cloudshell-user cloudshell-user   4096 Jul  8 20:37 psycopg_binary-3.2.1.dist-info
drwxr-xr-x. 2 cloudshell-user cloudshell-user   4096 Jul  8 20:37 psycopg_binary.libs
drwxr-xr-x. 2 cloudshell-user cloudshell-user   4096 Jul  8 20:37 __pycache__
drwxr-xr-x. 2 cloudshell-user cloudshell-user   4096 Jul  8 20:37 typing_extensions-4.12.2.dist-info
-rw-r--r--. 1 cloudshell-user cloudshell-user 134451 Jul  8 20:37 typing_extensions.py

Zip the software for the lambda layer
Go up one level to the directory that contains the “python” directory and zip the software installed in the previous step with

zip -r layer.zip python

The directory should now contain a zip file

/home/cloudshell-user $ pwd; ls -l
/home/cloudshell-user/dc-layer
total 4668
-rw-r--r--. 1 cloudshell-user cloudshell-user 4772949 Jul  8 20:43 layer.zip
drwxr-xr-x. 9 cloudshell-user cloudshell-user    4096 Jul  8 20:37 python

Publish the layer
Publish the layer using AWS CLI with the below command

aws lambda publish-layer-version --layer-name dc-psycopg3-layer \
    --zip-file fileb://layer.zip \
    --compatible-runtimes python3.12 \
    --compatible-architectures "x86_64"

Confirm that the layer was published
From the AWS Management Console, navigate to the Lambda Layers page and confirm that the above layer is present.

Create a Lambda function
Create a new lambda function using Python 3.12

Add the layer previously uploaded to the function’s definition

Lambda function – sample code
Below is sample code that connects to a Redshift cluster and performs a select on a table

import logging
import psycopg
from psycopg import OperationalError
psycopg._encodings._py_codecs["UNICODE"] = "utf-8"
psycopg._encodings.py_codecs.update(
    (k.encode(), v) for k, v in psycopg._encodings._py_codecs.items()
)

logger = logging.getLogger()
logger.setLevel(logging.INFO)

def lambda_handler(event, context):

    print('**** Connecting to Redshift')
    my_hostname = 'my-cluster.us-east-1.redshift.amazonaws.com'
    my_database = 'my-databse'
    my_user     = 'my-database-user'
    my_password = 'my-db-user-password'
    my_port     =  5439

    connection_args = 'host={} port={} dbname={} user={} password={}'.format(
        my_hostname,
        my_port,
        my_database,
        my_user,
        my_password
    )

    try:
        logger.info("Establishing connection to Redshift")
        conn = psycopg.connect(connection_args)
        logger.info("SUCCESS: Connection to Redshift succeeded!")
    except psycopg.Error as e:
        logger.error("ERROR: Unexpected error: Could not connect to Redshift.")
        logger.error(e)


    sql_text = "select col_01, col_02 from my_schema.my_table where col_01 = 'dc-test'"
    print(sql_text)

    try:
        cur = conn.cursor()
        cur.execute(sql_text)
        for row in cur.fetchall():
            col_01 = row[0]
            col_02 = row[1]
            print(col_01, col_02)
        cur.close()
    except psycopg.Error as e:
        print('SQL failed. Failing text is ', sql_text)
        print(f"Error from database",e)

Redshift – Using IAM authentication to generate database user credentials

An alternative to defining a user with a password in the database is to use IAM authentication to generate a temporary “password” that is valid for a predefined time. This process is defined in this blog post. All the commands were executed on an AWS EC2 Linux server using PSQL.

Create a user without a password
Connect to the database as a super user and create a user with the below SQL statement:

create user temp_creds_user password disable;

If the user currently exists in the database with a password and you want to change to IAM authentication, alter the user with the below SQL statement:

alter user temp_creds_user password disable;

Access to get credentials
In order to get the temporary credentials, we need to call get-cluster-credentials which requires redshift:GetClusterCredentials access. This is documented at this link.

Generate the temporary credentials
Generate temporary credentials with the below command:

aws redshift get-cluster-credentials \
    --db-user temp_creds_user \
    --db-name  \
    --cluster-identifier  \
    --no-auto-create \
    --duration-seconds 900

The above command will generate output similar to:

Connect to the database
Connect to the database with the below command. Note that the name of the database user has been prefixed with “IAM:”

\/usr/\/bin\/psql --host= --port=5439 --username=IAM:temp_creds_user --dbname=

The above command will generate output similar to:

Examples of Redshift’s Unload, Copy, and Super Data Type

Recently, my focus has been on using Amazon Redshift for a data warehousing solution, and I wanted to share some insights gained through working with unload, copy, and super data types. Unloading data allows for efficient extraction from Redshift to various storage options, facilitating downstream analytics and archival processes. Similarly, the COPY command serves as a linchpin for ingesting data into Redshift, enabling seamless integration from diverse sources. Super data types are a flexible option for accommodating complex data structures without the need for continuous schema changes.

Unload and rename a subset of columns
If you need to unload a subset of columns from a source table for load into a target table with different column names, this can be accomplished as follows:

unload (' select   old_col_01     as new_col_01
                  ,old_col_23     as new_col_02
                  ,old_col_42     as new_col_03
           from    my_schema.my_table_name
          where    old_col_24     is not null
'
)
to 's3://my-s3-bucket/table_01_data_'
iam_role  'arn:aws:iam::93073150:role/my-redshift-role'
JSON
;

The column name old_col_01 from the source table is being renamed to new_col_01 for the target table.

The target S3 bucket should not contain any files with the prefix in the unload command. A quick way to clean up the files without the need to use the AWS Console, filter and select the files and type “permanently delete” is to use AWS Cloudshell and the command:

aws s3 rm s3://my-s3-bucket/ --recursive --exclude "*" --include "table_01_data_*"

Please make sure that you have populated the exclude and include options correctly. There is no warning or confirmation. All the files matching the exclude and include are deleted with no recovery.

Unload a subset of rows from a table
The Redshift unload command does not allow the use of a limit clause directly in the select. The work around is to use it in a sub select as shown below:

unload (' select * 
          from   my_schema.my_table_name
          where  col_01 in 
                 (    
                   select col_01 
                   from   my_schema.my_table_name 
                   limit 400
                 ) 
'
)
to 's3://my-s3-bucket/table_01_data_'
iam_role  'arn:aws:iam::93073150:role/my-redshift-role'
JSON
;

The sub select refers to the same table and applies the limit clause.

Copy a certain column(s) from a file that contains other columns
If you have a file in S3 the contains an unload from a table with a number of columns and you only want to populate 1 or more columns in a different table, you can do it with the below command:

copy       my_schema.my_table_name
           (col_01, col_02)
from      's3://my-s3-bucket/table_01_data_'
iam_role  'arn:aws:iam::93073150:role/my-redshift-role'
region    'us-east-1'
FORMAT     JSON 'auto ignorecase'
;

Columns named col_01 and col_02 are copied into table name my_schema.my_table name while other columns in the S3 file are ignored.

Copying data and Redshift super data type
If you have a file containing JSON data and you need to load it into a Redshift table containing a super data type, you can use the following command:

copy       my_schema.my_table_name
from      's3://my-s3-bucket/ table_01_data_'
iam_role  'arn:aws:iam::93073150:role/my-redshift-role'
region    'us-east-1'
FORMAT     JSON 'noshred'
;

All of the data in the S3 file will be loaded into the super data type column in JSON format.

The data in the super data type column containing JSON data can be queried as follows. Assume that the table name is my_schema.my_super_data_table containing a super data type column named my_json_data. This JSON data contains elements named create_time, account_id etc. You can refer to the individual elements as follows:

select   my_json_data."create_time"::timestamp::date as create_date
        ,my_json_data."account_id" as account_id
        ,my_json_data."cust_name"  as cust_name
from     my_schema.my_super_data_table
where    my_json_data."zip_code" = 12345
;

Debugging PostgreSQL Stored Procedures in Amazon RDS: A Workaround

Debugging stored procedures in Amazon RDS for PostgreSQL might pose a challenge if you are accustomed to using a GUI tool. The absence of support for the pldbgapi extension in Amazon RDS for PostgreSQL complicates this process. In this blog post, I suggest a workaround based on deploying an additional PostgreSQL on an Amazon Windows EC2 instance as a debugging platform.

Assuming that you used AWS Schema Conversion Tool (SCT) with the Oracle extension pack, the code converted from Oracle to PostgreSQL will contain several calls to objects in schemas named aws_operators, aws_oracle_context, aws_oracle_data, and aws_oracle_ext. These modules emulate functions present in Oracle that are required for the code to execute in PostgreSQL. However, after the conversion by AWS SCT, you may still need to debug a stored procedure to resolve an issue.

The first step is to get the DDL for the objects created as part of the extension pack. The process is as follows:

– Start the AWS SCT tool,
– connect to the target database,
– right click on the target schema in the right-hand box,
– click on the “Apply Extension Pack for” Oracle in this case

In the “Apply extension pack for Oracle” dialogue box, progress to “Step 9: Function emulation” and click on the “Save scripts to” button on the bottom right.

Provide the name of a folder (preferably a new empty folder) as the target for the SQL files. The DDL files will be created as shown below

As of January 2024, this process will create 1506 files. This is subject to change as AWS releases newer versions of SCT.

The next step is to deploy an Amazon EC2 Windows instance. After the instance has been created, connect to the instance and download the PostgreSQL software from EnterpriseDB (EDB). I chose the EDB distribution as it includes several extensions preinstalled including pldbgapi.

The PostgreSQLTutorial website provides the steps to be followed to install PostgreSQL. After PostgreSQL has been installed, create a new database for this test. In this new database, create the pldbgapi extension with the SQL command

CREATE EXTENSION IF NOT EXISTS pldbgapi;

Confirm that the extension has been created. I am using PGAdmin to connect to my Amazon RDS PostgreSQL instance

After the extension has been created, copy the DDL files created from SCT to a location on the Windows server

(There are 1,509 files shown above as I created three extra files. Please ignore them.)

After some testing, I identified that the best way to deploy them was via Windows batch file with commands as shown below

@echo off
setlocal
set PGPASSWORD=password-goes-here
"C:\Program Files\PostgreSQL\15\bin\psql.exe" -h localhost -U postgres -d dctestdb -f "C:/dean/ssct-oracle-extension-pack-ddl/0001-aws_lambda.sql" -L "C:/dean/ssct-oracle-extension-pack-ddl/sct-ddl.log"
"C:\Program Files\PostgreSQL\15\bin\psql.exe" -h localhost -U postgres -d dctestdb -f "C:/dean/ssct-oracle-extension-pack-ddl/0002-dblink_transaction.sql" -L "C:/dean/ssct-oracle-extension-pack-ddl/sct-ddl.log"
.
.
.
All other files here
.
.
.

The output of the DDL operations will be in the log file specified in the above batch file

After the DDL files have been executed, the database has the aws_operators, aws_oracle_context, aws_oracle_data, and aws_oracle_ext schemas defined

At this point, you can extract the DDL for a stored procedure (and related objects) from Amazon RDS and create it in the Windows EC2 PostgreSQL database and you will be able to debug the stored procedure via the GUI.

Completed CompTIA CertMaster CE for my Security+ certification

Maintaining IT certifications can be a time-consuming commitment, often involving retesting and juggling busy schedules. This time I decided to try CompTIA’s CertMaster Continuing Education (CE) for Security+ to extend my certification.

This self-paced e-learning program proved to be an efficient and engaging avenue to refresh and expand my existing knowledge base. Instead of cramming for a one-off test, I had the flexibility to delve deeper into key security concepts, revisiting essential skills and acquiring new ones. While working at my own pace, I was able to maintain my certification while enhancing my skill set and staying ahead of the curve in the ever-changing world of cybersecurity.

Python Script for Deleting Old Files from S3 Bucket

I had a requirement to delete about 68,0000 files from multiple folders in an S3 bucket. This Python script automates the process of deleting old files from an Amazon S3 bucket based on pattern matching of the name of the folder/file. It connects to the S3 bucket, identifies files older than a specified timeframe, and deletes them while keeping a detailed audit trail. Here’s a breakdown of the script:

1. Setting Up:

The script imports the necessary modules: datetime for date manipulation, boto3 for interacting with S3, and timedelta for time calculations.
It defines variables for bucket name, prefix, file paths for storing S3 file names and files to be deleted, and the target file pattern for identification.

2. Gathering Files from S3:

A connection is established to S3 using boto3.
The list_objects_v2 paginator retrieves all files under the specified bucket and prefix. If you do not use this logic, only the first 1000 files will be listed.
The script iterates over each page and extracts the file names, storing them in a text file (files_in_s3).
A timestamp is recorded to indicate the completion of this stage.

3. Identifying Files for Deletion:

The script calculates the date two months ago using timedelta and datetime.
It iterates through the list of files from S3 and checks if they:
- Start with the specified pattern (my-file-name-pattern).
- Contain the two-month-ago date (yy_months_ago) in their name.
If both conditions are met, the file name is written to another text file (files_to_delete) for deletion.
A timestamp and a count of files marked for deletion are printed.

4. Deleting Identified Files:

The script iterates through the list of files to be deleted.
For each file, it extracts the folder and region information.
It checks if the current folder or region is different from the previous one. If yes, it prints a timestamp indicating the start of deletion for that specific folder/region.
The script then uses the delete_object function to remove the file from the S3 bucket.

5. Completion and Audit Trail:

A final timestamp marks the completion of file deletion.
The script prints “End of program” as a closing message.

Benefits:

Automates deletion of old files, reducing storage costs and improving data management.
Maintains an audit trail of files identified for deletion and their removal timestamps.
Customizable to different bucket configurations and deletion criteria.

Note:

This script assumes the necessary AWS credentials are configured for accessing S3 resources.
Modify the script parameters like bucket name, prefix, pattern, and file paths as needed for your specific scenario.

This script provides a comprehensive and efficient way to manage and delete old files in your S3 bucket, ensuring optimal storage utilization and data governance.

Code:

from datetime import datetime, timedelta
import boto3

now = datetime.now()
print(f"Starting at : {now}")
print(' ')

#
## Bucket details
#
bucket_name = 'my-bucket'
bucket_prefix = 'my-prefix/'
files_in_s3 = 'C:/dean/python/s3_list.txt'
files_to_delete = 'C:/dean/python/s3_delete.txt'

#
## Connect to S3 and get the file names
#
s3 = boto3.client('s3')
paginator = s3.get_paginator('list_objects_v2')
page_iterator= paginator.paginate(Bucket=bucket_name, Prefix=bucket_prefix)
with open(files_in_s3, 'w') as f:
    for page in page_iterator:
        contents = page.get('Contents', [])
        for item in contents:
            f.write(item['Key'] + '\n')  
now = datetime.now()
print(f"Collected files from S3 at {now}")
print(' ')

#
## find the n-2 month
#
n_months_ago = datetime.now() - timedelta(days=60)
yy_months_ago = n_months_ago.strftime('%Y/%m')
print(f"Deleting files for {yy_months_ago}")
print(' ')

#
## Write the files to be deleted to an audit trail
#
file_ctr = 0
file_out= open(files_to_delete, 'w')
with open(files_in_s3, 'r') as f:
    for line in f:
        file_name = line.strip()
        if  file_name.startswith('my-file-name-pattern'):
            if  yy_months_ago in file_name:
                file_out.write(file_name + '\n')
                file_ctr = file_ctr + 1
now = datetime.now()
print(f"Identified files to delete at {now}")
temp = 'Number of files to delete ' + str(file_ctr)                
print(temp)
print(' ')
file_out.close()

#
## Delete the files
#
prev_folder = ''
prev_region = ''
with open(files_to_delete, 'r') as f:
    for line in f:
        cur_folder = line.split('/')[3]
        cur_region = line.split('/')[4]
        if cur_folder != prev_folder or cur_region != prev_region:
            now = datetime.now()
            print(f"Deleting files from {cur_folder}/{cur_region} at {now}")
            prev_folder = cur_folder
            prev_region = cur_region
        file_name = line.strip()
        s3.delete_object(Bucket=bucket_name, Key=file_name)
print(' ')
now = datetime.now()
print(f"Completed file deletion at {now}")
print(' ')
print('End of program')