CA$0+
Use this free outlier and anomaly detection code to find, flag and visualize outliers and anomalies. Run this free outlier analysis macro with Python, R or other programming languages. Or use the outlier analysis function as a formula in a spreadsheet. The anomaly detection code applies box & whisker plot anomaly detection theory using key descriptive statistics: 75th percentile (HIGHQ), 25th percentile (LOWQ), Interquartile Range (IQR). The top (UPPER) and bottom (LOWER) box plot whiskers are calculated. Any data values greater than UPPER or less than LOWER are outliers. A message box pop-ups to summarize which data values are outliers. FREE Outlier and Anomaly Detection Template and Video runs this code. It completes Steps-1-4 of the Free Step-by-Step Outlier Analysis Tutorial.
Option Explicit
Option Base 1
Sub mcrOutlierAnomalyDetection()
'Free outlier and anomaly detection code.
Dim wsStatistics As Worksheet
Dim data As Range
Dim Outlier_Data As Range
Dim HIGHQ As Integer
Dim LOWQ As Integer
Dim IQR As Integer
Dim UPPER As Integer
Dim LOWER As Integer
Set Outlier_Data = Application.InputBox( _ Title:="Find outliers and anomalies in data", _ Prompt:="Select your data", _ Type:=8) Outlier_Data.Select On Error GoTo 0
HIGHQ = WorksheetFunction.Percentile(Outlier_Data, 0.75)
LOWQ = WorksheetFunction.Percentile(Outlier_Data, 0.25)
IQR = HIGHQ - LOWQ
UPPER = HIGHQ + 1.5 * (IQR)
LOWER = LOWQ - 1.5 * (IQR)
For Each data In Outlier_Data
data.Offset(0, 1).Value = findOutlier(data, UPPER, LOWER, HIGHQ, LOWQ)
If data.Offset(0, 1).Value = "High Outlier" Then
data.Offset(0, 1).Interior.Color = 65535
data.Offset(0, 1).Font.Color = -16776961
Else data.Offset(0, 1).Font.Color = RGB(0, 0, 0)
End If
Next data
With wsStatistics.Range("F1")
.Offset(0, 0).Value = "Outlier Flag"
End With
With wsStatistics.Range("J1")
.Offset(1, 0).Value = "UPPER"
.Offset(1, 1).Value = UPPER
.Offset(2, 0).Value = "LOWER"
.Offset(2, 1).Value = LOWER
.Offset(3, 0).Value = "IQR"
.Offset(3, 1).Value = IQR
.Offset(4, 0).Value = "HIGHQ"
.Offset(4, 1).Value = HIGHQ
.Offset(5, 0).Value = "LOWQ"
.Offset(5, 1).Value = LOWQ
End With
MsgBox "Any data value that is greater than " & UPPER & " or less than " & LOWER & " is a
statistical outlier.", vbCritical, "Outlier Analysis Conclusion"
End Sub
Function findOutlier(data As Range, UPPER As Integer, LOWER As Integer, HIGHQ As Integer, LOWQ As Integer) As String
If data.Value > UPPER Then
findOutlier = "High Outlier"
ElseIf data.Value < LOWER Then
findOutlier = "Low Outlier"
ElseIf dataValue > HIGHQ And data.Value < UPPER Then
findOutlier = "High"
ElseIf data.Value < LOWQ And data.Value > LOWER Then
findOutlier = "Low" Else findOutlier = "Normal"
End If
End Function