A sample script to look for monthly extracts and kick off the transfer program. Wait for all extracts to complete and send notifications periodically.
After month-end jobs complete on the source, a set of file extracts were created and transferred to data warehousing environment. The extract programs run for few hours and the transfer is initiated only when all the extracts were complete.
A control file with the extract file names, number of fields and minimum record count expected for each extract is used. Extract files are simple text files with fields delimited by pipe (“|”) sign. The script checks for couple of items:
- All extract files were created after the month-end flag is set.
- For each file, field count by record is compared - this is to capture any changes to layout on the source system that is not co-ordinated with the targets.
- Record count for each file compared against the control file. If the count is less than minimum, the extract may still be going on or failed.
- If all conditions were met, and if the load program wasn’t run already, start it. Else, send a mail to users notifying the status.
Control file layout
Control file contains the following: extract file name, field count and minimum record count.
extract.file.1,4,100000
extract.file.2,63,3000000
extract.file.3,21,3000000
extract.file.4,9,1250000
extract.file.5,12,10000000
extract.file.6,4,5000
Monitor script
Monitor directory for new files (script is scheduled to run in cron for couple of days) and start the transfer if field and record counts match for all the files. If not send a notification to user - use a counter to send mails every so often instead of every time the script runs.
#!/bin/bash
cd /home/user1/scripts
export PATH=$PATH:/bin:/home/user1/scripts
APP_BASE_DIR=/home/user1/scripts
MTHLY_FILES=/home/user1/scripts/monthlyrun_files.dat
MAIL_CNT=/home/user1/scripts/mail.cnt
APP_DATA_DIR=/opt/mdata
JOB_TMSTP=${APP_DATA_DIR}/.app_job_tmstp
PROG_TMSTP=${APP_DATA_DIR}/.app_prog_tmstp
ERR_PERCENT=5
ARCHIVE_LOG_FILE=/home/user1/scripts/monthly_run.log
LOG_FILE=/home/user1/scripts/run_app.log.`date +%d%H%M`
MAIL_SUCCESS="user1@email.com"
MAIL_FAIL="user1@email.com"
MTHLY_DIAG_FILE=${APP_BASE_DIR}/target_load.diag
mesg=" "
MTHLY_STATUS="GOOD"
total_files=`wc -l ${MTHLY_FILES} |awk '{print $1}'`
file_count=0
MAX_WAIT_CNT=4
# Save to logfile
function log {
echo $* >> ${LOG_FILE}
return
}
# Mail counter - since this program runs every 15 minutes and extract files can take several hours, do not send repeated emails.
function get_mail_count {
if [[ -s $MAIL_CNT ]]
then
read COUNTER < $MAIL_CNT
COUNTER=$((COUNTER+1))
echo $COUNTER>$MAIL_CNT
if [[ $COUNTER -gt $MAX_WAIT_CNT ]]
then
echo "1">$MAIL_CNT
return 1;
else
return 0;
fi
else
echo "1">$MAIL_CNT
return 0;
fi
}
# Initialize log file
if [[ -s ${LOG_FILE} ]]
then
>${LOG_FILE}
fi
log "----------------------------------------------------------------------------------"
log "Running app1-monthly-check at `date +%Y%m%d-%H%M%S`"
# Monthly run date set to 2nd of the month.
log "Setting START timestamp for MONTLY_RUN files to $(date --date=$(date +%Y-%m-02))"
touch --date=$(date +%Y-%m-04) $JOB_TMSTP
#touch --date=$(date +%Y-%m-04) -d '-1 month' $JOB_TMSTP
touch --date=$(date +%Y-%m-%d) -d '-15 mins' $PROG_TMSTP
cd ${APP_DATA_DIR}
log "-----------------------------------------------------------------------------"
if [[ $MTHLY_DIAG_FILE -nt $JOB_TMSTP ]]
then
# Check if history data is already run for this month. If so, log and exit.
# Don't attempt to do the checks.
log "history data program already run for this month"
cat ${LOG_FILE} >> ${ARCHIVE_LOG_FILE}
mv ${LOG_FILE} /home/user1/scripts/monthly_run.log
exit
fi
# for each file in MTHLY_FILES list - get field count, record count.
# If timestamp of data file is recent (after monthly run date)
# but more than 15 minutes old (extract completed), proceed with checks.
# Check record - if less than minimum extract may be incomplete.
# If field count doesn't match, the load will fail in target system. Send error message.
for line in `cat ${MTHLY_FILES}`
do
(( file_count++ ))
# echo "Processing file number $file_count"
fname=$(echo $line|awk -F"," '{print $1}')
log "FILE <$fname>"
fldcnt=$(echo $line|awk -F"," '{print $2}')
reccnt=$(echo $line|awk -F"," '{print $3}')
# echo "File = $fname, fields = $fldcnt, records = $reccnt"
if [[ $fname -nt $JOB_TMSTP ]]
then
log "---->Generated this month? [OK]"
if [[ $fname -nt $PROG_TMSTP ]]
then
mesg="-------->Timestamp within last 15 minutes. Still Running. [STOP]"
log $mesg
MTHLY_STATUS="RUNNING"
break
else
log "-------->Timestamp older than 15 minutes. Continue with other checks [OK]"
fi
else
mesg="---->Generated this month? - NO. [STOP]"
log $mesg
MTHLY_STATUS="OLD_DATA"
break
fi
# echo "============================================================="
# echo "Checking file $fname for fields > $fldcnt"
tval=`awk -F"|" -v rsize=$fldcnt \
'BEGIN{cnt=0;rcnt=0} \
{rcnt ++}
{if (NF>rsize) {cnt++; }} \
END{print rcnt "," cnt }' $fname`
# echo "outside val = $tval"
act_rec_cnt=$(echo $tval|cut -f1 -d",")
act_fail_cnt=$(echo $tval|cut -f2 -d",")
# echo "actual records = $act_rec_cnt, possible fail records = $act_fail_cnt"
if [[ $act_rec_cnt -lt $reccnt ]]
then
mesg="------------>Record count in file ($act_rec_cnt) is less than average count ($reccnt) [STOP]"
log $mesg
MTHLY_STATUS="FEWER_RECORDS"
break
fi
if [[ $act_fail_cnt -gt 0 ]]
then
log "------------>BAD records ($act_fail_cnt) out of ($act_rec_cnt)"
if [[ $act_fail_cnt -gt $(($act_rec_cnt/$ERR_PERCENT/100)) ]]
then
mesg="---------------->BAD records ($act_fail_cnt) is more than $ERR_PERCENT% of records ($act_rec_cnt). [STOP]"
log $mesg
MTHLY_STATUS="BAD_RECORDS"
break
fi
fi
done
# File and record checks complete. Proceed with notification or starting the transfers.
snd_mail=0
if [[ $MTHLY_STATUS != "GOOD" ]]
then
log "Message is $MTHLY_STATUS"
if [[ $MTHLY_STATUS = "BAD_RECORDS" ]]
then
mesg="ERROR:$MTHLY_STATUS:BAD records more than allowed PERCENTAGE"
log $mesg
snd_mail=1
elif [[ $MTHLY_STATUS = "FEWER_RECORDS" ]]
then
log "$MTHLY_STATUS:FEWER records in file. Extract may be incomplete"
log $mesg
snd_mail=1
elif [[ $MTHLY_STATUS = "FEWER_RECORDS" ]]
then
log "$MTHLY_STATUS:FEWER records in file. Extract may be incomplete"
log $mesg
snd_mail=1
elif [[ $MTHLY_STATUS = "OLD_DATA" ]]
then
if [[ $file_count -eq 0 ]]
then
mesg="$MTHLY_STATUS:File - 1:Extract possibly not started"
# Change email frequency to 2 hrs to not spam before the extracts are started.
MAX_WAIT_CNT=8
else
mesg="$MTHLY_STATUS:2-File - $file_count (of $total_files) - extract has old timestamp. Stopping"
fi
log $mesg
get_mail_count
snd_mail=`echo $?`
else
mesg="$MTHLY_STATUS:Extract is still running - $file_count (of $total_files)"
get_mail_count
snd_mail=`echo $?`
log $mesg
fi
if [[ $snd_mail -eq 1 ]]
then
mailx -s "MONTHLY_HISTORY_DATA:$mesg" $MAIL_FAIL < ${LOG_FILE}
fi
elif [[ $MTHLY_DIAG_FILE -nt $JOB_TMSTP ]]
then
log "history data program already run for this month"
else
log "Good to go. Pushing history data now"
mailx -s "Running history data push to warehouse" $MAIL_SUCCESS < ${LOG_FILE}
cd /home/user1/scripts
### < ETL JOB transfer here>
fi
cat ${LOG_FILE} >> ${ARCHIVE_LOG_FILE}
mv ${LOG_FILE} /home/user1/scripts/monthly_run.log
exit