initial commit

This commit is contained in:
Ben Goldsworthy 2021-04-02 11:10:12 +01:00
commit 51deec9859
14 changed files with 3059 additions and 0 deletions

132
.gitignore vendored Normal file
View File

@ -0,0 +1,132 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Virtual environment
pyvenv/

591
.pylintrc Normal file
View File

@ -0,0 +1,591 @@
[MASTER]
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-whitelist=
# Specify a score threshold to be exceeded before program exits with error.
fail-under=10.0
# Add files or directories to the blacklist. They should be base names, not
# paths.
ignore=CVS
# Add files or directories matching the regex patterns to the blacklist. The
# regex matches against base names, not paths.
ignore-patterns=wsdump.py,py..,__pycache__
# Python code to execute, usually for sys.path manipulation such as
# pygtk.require().
#init-hook=
# Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
# number of processors available to use.
jobs=1
# Control the amount of potential inferred values when inferring a single
# object. This can help the performance when dealing with large functions or
# complex, nested conditions.
limit-inference-results=100
# List of plugins (as comma separated values of python module names) to load,
# usually to register additional checkers.
load-plugins=
# Pickle collected data for later comparisons.
persistent=yes
# When enabled, pylint would attempt to guess common misconfiguration and emit
# user-friendly hints instead of false-positive error messages.
suggestion-mode=yes
# Allow loading of arbitrary C extensions. Extensions are imported into the
# active Python interpreter and may run arbitrary code.
unsafe-load-any-extension=no
[MESSAGES CONTROL]
# Only show warnings with the listed confidence levels. Leave empty to show
# all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
confidence=
# Disable the message, report, category or checker with the given id(s). You
# can either give multiple identifiers separated by comma (,) or put this
# option multiple times (only on the command line, not in the configuration
# file where it should appear only once). You can also use "--disable=all" to
# disable everything first and then reenable specific checks. For example, if
# you want to run only the similarities checker, you can use "--disable=all
# --enable=similarities". If you want to run only the classes checker, but have
# no Warning level messages displayed, use "--disable=all --enable=classes
# --disable=W".
disable=print-statement,
parameter-unpacking,
unpacking-in-except,
old-raise-syntax,
backtick,
long-suffix,
old-ne-operator,
old-octal-literal,
import-star-module-level,
non-ascii-bytes-literal,
raw-checker-failed,
bad-inline-option,
locally-disabled,
file-ignored,
suppressed-message,
useless-suppression,
deprecated-pragma,
use-symbolic-message-instead,
apply-builtin,
basestring-builtin,
buffer-builtin,
cmp-builtin,
coerce-builtin,
execfile-builtin,
file-builtin,
long-builtin,
raw_input-builtin,
reduce-builtin,
standarderror-builtin,
unicode-builtin,
xrange-builtin,
coerce-method,
delslice-method,
getslice-method,
setslice-method,
no-absolute-import,
old-division,
dict-iter-method,
dict-view-method,
next-method-called,
metaclass-assignment,
indexing-exception,
raising-string,
reload-builtin,
oct-method,
hex-method,
nonzero-method,
cmp-method,
input-builtin,
round-builtin,
intern-builtin,
unichr-builtin,
map-builtin-not-iterating,
zip-builtin-not-iterating,
range-builtin-not-iterating,
filter-builtin-not-iterating,
using-cmp-argument,
eq-without-hash,
div-method,
idiv-method,
rdiv-method,
exception-message-attribute,
invalid-str-codec,
sys-max-int,
bad-python3-import,
deprecated-string-function,
deprecated-str-translate-call,
deprecated-itertools-function,
deprecated-types-field,
next-method-defined,
dict-items-not-iterating,
dict-keys-not-iterating,
dict-values-not-iterating,
deprecated-operator-function,
deprecated-urllib-function,
xreadlines-attribute,
deprecated-sys-function,
exception-escape,
comprehension-escape
# Enable the message, report, category or checker with the given id(s). You can
# either give multiple identifier separated by comma (,) or put this option
# multiple time (only on the command line, not in the configuration file where
# it should appear only once). See also the "--disable" option for examples.
enable=c-extension-no-member
[REPORTS]
# Python expression which should return a score less than or equal to 10. You
# have access to the variables 'error', 'warning', 'refactor', and 'convention'
# which contain the number of messages in each category, as well as 'statement'
# which is the total number of statements analyzed. This score is used by the
# global evaluation report (RP0004).
evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
# Template used to display messages. This is a python new-style format string
# used to format the message information. See doc for all details.
#msg-template=
# Set the output format. Available formats are text, parseable, colorized, json
# and msvs (visual studio). You can also give a reporter class, e.g.
# mypackage.mymodule.MyReporterClass.
output-format=text
# Tells whether to display a full report or only the messages.
reports=no
# Activate the evaluation score.
score=yes
[REFACTORING]
# Maximum number of nested blocks for function / method body
max-nested-blocks=5
# Complete name of functions that never returns. When checking for
# inconsistent-return-statements if a never returning function is called then
# it will be considered as an explicit return statement and no message will be
# printed.
never-returning-functions=sys.exit
[SPELLING]
# Limits count of emitted suggestions for spelling mistakes.
max-spelling-suggestions=4
# Spelling dictionary name. Available dictionaries: none. To make it work,
# install the python-enchant package.
spelling-dict=
# List of comma separated words that should not be checked.
spelling-ignore-words=
# A path to a file that contains the private dictionary; one word per line.
spelling-private-dict-file=
# Tells whether to store unknown words to the private dictionary (see the
# --spelling-private-dict-file option) instead of raising a message.
spelling-store-unknown-words=no
[FORMAT]
# Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
expected-line-ending-format=
# Regexp for a line that is allowed to be longer than the limit.
ignore-long-lines=^\s*(# )?<?https?://\S+>?$
# Number of spaces of indent required inside a hanging or continued line.
indent-after-paren=4
# String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
# tab).
indent-string=' '
# Maximum number of characters on a single line.
max-line-length=100
# Maximum number of lines in a module.
max-module-lines=1000
# Allow the body of a class to be on the same line as the declaration if body
# contains single statement.
single-line-class-stmt=no
# Allow the body of an if to be on the same line as the test if there is no
# else.
single-line-if-stmt=no
[LOGGING]
# The type of string formatting that logging methods do. `old` means using %
# formatting, `new` is for `{}` formatting.
logging-format-style=old
# Logging modules to check that the string format arguments are in logging
# function parameter format.
logging-modules=logging
[STRING]
# This flag controls whether inconsistent-quotes generates a warning when the
# character used as a quote delimiter is used inconsistently within a module.
check-quote-consistency=no
# This flag controls whether the implicit-str-concat should generate a warning
# on implicit string concatenation in sequences defined over several lines.
check-str-concat-over-line-jumps=no
[TYPECHECK]
# List of decorators that produce context managers, such as
# contextlib.contextmanager. Add to this list to register other decorators that
# produce valid context managers.
contextmanager-decorators=contextlib.contextmanager
# List of members which are set dynamically and missed by pylint inference
# system, and so shouldn't trigger E1101 when accessed. Python regular
# expressions are accepted.
generated-members=
# Tells whether missing members accessed in mixin class should be ignored. A
# mixin class is detected if its name ends with "mixin" (case insensitive).
ignore-mixin-members=yes
# Tells whether to warn about missing members when the owner of the attribute
# is inferred to be None.
ignore-none=yes
# This flag controls whether pylint should warn about no-member and similar
# checks whenever an opaque object is returned when inferring. The inference
# can return multiple potential results while evaluating a Python object, but
# some branches might not be evaluated, which results in partial inference. In
# that case, it might be useful to still emit no-member and other checks for
# the rest of the inferred objects.
ignore-on-opaque-inference=yes
# List of class names for which member attributes should not be checked (useful
# for classes with dynamically set attributes). This supports the use of
# qualified names.
ignored-classes=optparse.Values,thread._local,_thread._local
# List of module names for which member attributes should not be checked
# (useful for modules/projects where namespaces are manipulated during runtime
# and thus existing member attributes cannot be deduced by static analysis). It
# supports qualified module names, as well as Unix pattern matching.
ignored-modules=
# Show a hint with possible names when a member name was not found. The aspect
# of finding the hint is based on edit distance.
missing-member-hint=yes
# The minimum edit distance a name should have in order to be considered a
# similar match for a missing member name.
missing-member-hint-distance=1
# The total number of similar names that should be taken in consideration when
# showing a hint for a missing member.
missing-member-max-choices=1
# List of decorators that change the signature of a decorated function.
signature-mutators=
[MISCELLANEOUS]
# List of note tags to take in consideration, separated by a comma.
notes=FIXME,
XXX,
TODO
# Regular expression of note tags to take in consideration.
#notes-rgx=
[SIMILARITIES]
# Ignore comments when computing similarities.
ignore-comments=yes
# Ignore docstrings when computing similarities.
ignore-docstrings=yes
# Ignore imports when computing similarities.
ignore-imports=no
# Minimum lines number of a similarity.
min-similarity-lines=10000
[BASIC]
# Naming style matching correct argument names.
argument-naming-style=snake_case
# Regular expression matching correct argument names. Overrides argument-
# naming-style.
#argument-rgx=
# Naming style matching correct attribute names.
attr-naming-style=snake_case
# Regular expression matching correct attribute names. Overrides attr-naming-
# style.
#attr-rgx=
# Bad variable names which should always be refused, separated by a comma.
bad-names=foo,
bar,
baz,
toto,
tutu,
tata
# Bad variable names regexes, separated by a comma. If names match any regex,
# they will always be refused
bad-names-rgxs=
# Naming style matching correct class attribute names.
class-attribute-naming-style=any
# Regular expression matching correct class attribute names. Overrides class-
# attribute-naming-style.
#class-attribute-rgx=
# Naming style matching correct class names.
class-naming-style=PascalCase
# Regular expression matching correct class names. Overrides class-naming-
# style.
#class-rgx=
# Naming style matching correct constant names.
const-naming-style=UPPER_CASE
# Regular expression matching correct constant names. Overrides const-naming-
# style.
#const-rgx=
# Minimum line length for functions/classes that require docstrings, shorter
# ones are exempt.
docstring-min-length=-1
# Naming style matching correct function names.
function-naming-style=snake_case
# Regular expression matching correct function names. Overrides function-
# naming-style.
#function-rgx=
# Good variable names which should always be accepted, separated by a comma.
good-names=i,
j,
k,
ex,
Run,
_
# Good variable names regexes, separated by a comma. If names match any regex,
# they will always be accepted
good-names-rgxs=
# Include a hint for the correct naming format with invalid-name.
include-naming-hint=no
# Naming style matching correct inline iteration names.
inlinevar-naming-style=any
# Regular expression matching correct inline iteration names. Overrides
# inlinevar-naming-style.
#inlinevar-rgx=
# Naming style matching correct method names.
method-naming-style=snake_case
# Regular expression matching correct method names. Overrides method-naming-
# style.
#method-rgx=
# Naming style matching correct module names.
module-naming-style=snake_case
# Regular expression matching correct module names. Overrides module-naming-
# style.
#module-rgx=
# Colon-delimited sets of names that determine each other's naming style when
# the name regexes allow several styles.
name-group=
# Regular expression which should only match function or class names that do
# not require a docstring.
no-docstring-rgx=^_
# List of decorators that produce properties, such as abc.abstractproperty. Add
# to this list to register other decorators that produce valid properties.
# These decorators are taken in consideration only for invalid-name.
property-classes=abc.abstractproperty
# Naming style matching correct variable names.
variable-naming-style=snake_case
# Regular expression matching correct variable names. Overrides variable-
# naming-style.
#variable-rgx=
[VARIABLES]
# List of additional names supposed to be defined in builtins. Remember that
# you should avoid defining new builtins when possible.
additional-builtins=
# Tells whether unused global variables should be treated as a violation.
allow-global-unused-variables=yes
# List of strings which can identify a callback function by name. A callback
# name must start or end with one of those strings.
callbacks=cb_,
_cb
# A regular expression matching the name of dummy variables (i.e. expected to
# not be used).
dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
# Argument names that match this expression will be ignored. Default to name
# with leading underscore.
ignored-argument-names=_.*|^ignored_|^unused_
# Tells whether we should check for unused import in __init__ files.
init-import=no
# List of qualified module names which can have objects that can redefine
# builtins.
redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
[CLASSES]
# Warn about protected attribute access inside special methods
check-protected-access-in-special-methods=no
# List of method names used to declare (i.e. assign) instance attributes.
defining-attr-methods=__init__,
__new__,
setUp,
__post_init__
# List of member names, which should be excluded from the protected access
# warning.
exclude-protected=_asdict,
_fields,
_replace,
_source,
_make
# List of valid names for the first argument in a class method.
valid-classmethod-first-arg=cls
# List of valid names for the first argument in a metaclass class method.
valid-metaclass-classmethod-first-arg=cls
[DESIGN]
# Maximum number of arguments for function / method.
max-args=5
# Maximum number of attributes for a class (see R0902).
max-attributes=7
# Maximum number of boolean expressions in an if statement (see R0916).
max-bool-expr=5
# Maximum number of branch for function / method body.
max-branches=12
# Maximum number of locals for function / method body.
max-locals=15
# Maximum number of parents for a class (see R0901).
max-parents=7
# Maximum number of public methods for a class (see R0904).
max-public-methods=20
# Maximum number of return / yield for function / method body.
max-returns=6
# Maximum number of statements in function / method body.
max-statements=50
# Minimum number of public methods for a class (see R0903).
min-public-methods=2
[IMPORTS]
# List of modules that can be imported at any level, not just the top level
# one.
allow-any-import-level=
# Allow wildcard imports from modules that define __all__.
allow-wildcard-with-all=no
# Analyse import fallback blocks. This can be used to support both Python 2 and
# 3 compatible code, which means that the block might have code that exists
# only in one or another interpreter, leading to false positives when analysed.
analyse-fallback-blocks=no
# Deprecated modules which should not be used, separated by a comma.
deprecated-modules=optparse,tkinter.tix
# Create a graph of external dependencies in the given file (report RP0402 must
# not be disabled).
ext-import-graph=
# Create a graph of every (i.e. internal and external) dependencies in the
# given file (report RP0402 must not be disabled).
import-graph=
# Create a graph of internal dependencies in the given file (report RP0402 must
# not be disabled).
int-import-graph=
# Force import order to recognize a module as part of the standard
# compatibility libraries.
known-standard-library=
# Force import order to recognize a module as part of a third party library.
known-third-party=enchant
# Couples of modules and preferred modules, separated by a comma.
preferred-modules=
[EXCEPTIONS]
# Exceptions that will emit a warning when being caught. Defaults to
# "BaseException, Exception".
overgeneral-exceptions=BaseException,
Exception

40
CONTRIBUTING.md Normal file
View File

@ -0,0 +1,40 @@
This project welcomes contributions!
Please follow these guidelines when contributing, as it will give your pull
request the best chance of being accepted:
# Requesting Features/Reporting Bugs
To request a new feature or to report a bug, please open a [new Issue][new-issue].
Please note: This project is **not** actively supported. Opened Issues **may**
be responded to, but this is not guaranteed.
# Contributing Code
- This project uses [GitHub Flow][github-flow]
- branch off of `main` to start developing (`git checkout -b <your branch>`)
- ensure that your new branch has a descriptive name
- create a remote copy of your new branch (`git push`)
- create a draft [pull request][pull-request]
to merge your branch with `main` — tag any related or to-close Issues
- when you think you're finished, un-draft your pull request.
# Commits
- This project uses [Conventional Commits][conventional]; and
- keep individual commits as small as possible
# Versioning
- This project uses [Semantic Versioning][semver]
# Translating
- Translations are welcome!
[new-issue]: https://github.com/Rumperuu/Threat-Intelligence-Service/issues/new
[github-flow]: https://githubflow.github.io/
[pull-request]: https://github.com/Rumperuu/Threat-Intelligence-Service/compare
[conventional]: https://www.conventionalcommits.org
[semver]: https://semver.org/

115
LICENSE Normal file
View File

@ -0,0 +1,115 @@
THE CRAPL v0 BETA 1
0. Information about the CRAPL
If you have questions or concerns about the CRAPL, or you need more
information about this license, please contact:
Matthew Might
http://matt.might.net/
I. Preamble
Science thrives on openness.
In modern science, it is often infeasible to replicate claims without
access to the software underlying those claims.
Let's all be honest: when scientists write code, aesthetics and
software engineering principles take a back seat to having running,
working code before a deadline.
So, let's release the ugly. And, let's be proud of that.
II. Definitions
1. "This License" refers to version 0 beta 1 of the Community
Research and Academic Programming License (the CRAPL).
2. "The Program" refers to the medley of source code, shell scripts,
executables, objects, libraries and build files supplied to You,
or these files as modified by You.
[Any appearance of design in the Program is purely coincidental and
should not in any way be mistaken for evidence of thoughtful
software construction.]
3. "You" refers to the person or persons brave and daft enough to use
the Program.
4. "The Documentation" refers to the Program.
5. "The Author" probably refers to the caffeine-addled graduate
student that got the Program to work moments before a submission
deadline.
III. Terms
1. By reading this sentence, You have agreed to the terms and
conditions of this License.
2. If the Program shows any evidence of having been properly tested
or verified, You will disregard this evidence.
3. You agree to hold the Author free from shame, embarrassment or
ridicule for any hacks, kludges or leaps of faith found within the
Program.
4. You recognize that any request for support for the Program will be
discarded with extreme prejudice.
5. The Author reserves all rights to the Program, except for any
rights granted under any additional licenses attached to the
Program.
IV. Permissions
1. You are permitted to use the Program to validate published
scientific claims.
2. You are permitted to use the Program to validate scientific claims
submitted for peer review, under the condition that You keep
modifications to the Program confidential until those claims have
been published.
3. You are permitted to use and/or modify the Program for the
validation of novel scientific claims if You make a good-faith
attempt to notify the Author of Your work and Your claims prior to
submission for publication.
4. If You publicly release any claims or data that were supported or
generated by the Program or a modification thereof, in whole or in
part, You will release any inputs supplied to the Program and any
modifications You made to the Progam. This License will be in
effect for the modified program.
V. Disclaimer of Warranty
THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT
WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND
PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE
DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR
CORRECTION.
VI. Limitation of Liability
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR
CONVEYS THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES
ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT
NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR
LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM
TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER
PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES.

130
README.md Normal file
View File

@ -0,0 +1,130 @@
# Threat Intelligence Service
A tool for collecting threat intelligence data and running Monte Carlo simulations
based on it.
## Table of Contents
* [Tech Stack](#tech-stack)
* [Features](#features)
* [Installation](#installation)
* [Configuration Setup](#configuration-setup)
* [Usage](#usage)
* [Testing](#testing)
* [Code Formatting](#code-formatting)
* [Documentation](#documentation)
* [Acknowledgments](#acknowledgements)
* [License](#license)
* [Contact Information](#contact-information)
## Technology Stack
The risk calculation scripts are written in [Python][python], along with
prototypes written in [R][r].
The Neo4j graph database uses the [Cypher][cypher] query language.
| Technology | Description | Link |
|------------|----------------------------------|------|
| Neo4j | Graph database management system | [Link](https://neo4j.com/) |
## Features
This repo. provides:
- Scripts for (re)generating incident number and average cost distributions
and running Monte Carlo simulations using those distributions;
- a set of Cypher commands to allow for easy initial population of a Neo4j
threat intelligence graph database with data derived from the
[_Cyber Security Breaches Survey_ 2020][csbs2020]; and
- a full suite of automated linting functions to ensure codebase standardisation.
## Installation
### Threat Intelligence Database (Neo4j)
1. Install [Neo4j Desktop][neo4j-desktop];
1. in the Neo4j Desktop app, create a new Project;
1. in that project, add either a Local DBMS or a Remote Connection (depending
on which environment you are in) and call it Threat Intelligence:
- make sure to update the connection details in `src/scripts/graph.py`.
1. add the file `contrib/database.cypher` to the Project;
1. open your server in the Neo4j Browser;
1. go to the Project Files tab and press the run button next to `database.cypher`.
### Scripts
1. Clone the repo. to your dev. environment (`git clone git@github.com:Rumperuu/Threat-Intelligence-Service.git`);
1. enter the new folder (`cd Threat-Intelligence-Service`);
1. create a virtual Python environment (`python3.⟨version⟩ -m venv pyvenv`);
1. activate your virtual environment (`source ./pyvenv/bin/activate`); and
1. install Python package with pip (`pip install -r requirements.txt`).
## Configuration Setup
TODO: Add environment config.
## Usage
Run `python src/montecarlo.py` to run a Monte Carlo simulation. Use `-h` to view
the available options.
Run `python src/regenerate-distributions.py` to (re)generate all propability
distributions. Use `-h` to view the available options.
## Testing
There are not currently any tests.
## Code formatting
There is not currently any automated code formatting or linting.
### Python Code
Python code must conform to [PEP 8][pep8].
- Run `black --target-version=py38 */**/*.py` to format all Python files with [Black][black].
- Use `--check` to view the output without automatically fixing warnings and
errors.
- Run `pylint */**/*.py --output-format=colorized` to lint all Python files with [Pylint][pylint].
- Pylint does not have the ability to automatically fix warnings and errors.
Pylint configuration settings are found in `.pylintrc`.
## Documentation
There is currently no documentation.
## Acknowledgements
This project was initially developed as part of [KTP № 11598][ktp], with
funding provided by [Innovate UK][innovate-uk] & [Mitigate Cyber][mitigate].
This game was inspired by Hubbard & Seiersen's book _How to Measure Anything in Cybersecurity Risk_.
## License
This project is currently released under the [CRAPL][crapl]. It should **NOT**
be used in a production environment in its current state.
## Contact Information
| Name | Link(s) |
|---------------|-----------------------|
|Ben Goldsworthy| [Email][bgoldsworthy] |
[python]: https://www.python.org/
[r]: https://www.r-project.org/
[cypher]: https://neo4j.com/developer/cypher/
[csbs2020]: https://www.gov.uk/government/statistics/cyber-security-breaches-survey-2020
[neo4j-desktop]: https://neo4j.com/download/?ref=try-neo4j-lp
[pep8]: https://www.python.org/dev/peps/pep-0008/
[black]: https://pypi.org/project/black/
[pylint]: https://pylint.org/
[ktp]: https://info.ktponline.org.uk/action/details/partnership.aspx?id=11598
[innovate-uk]: https://www.gov.uk/government/organisations/innovate-uk
[mitigate]: http://mitigatecyber.com/
[crapl]: https://matt.might.net/articles/crapl/
[bgoldsworthy]: mailto:me+threatintelservice@bengoldsworthy.net

20
SECURITY.md Normal file
View File

@ -0,0 +1,20 @@
# Security Policy
## Supported Versions
The following versions of this software are currently being supported with
security updates:
| Version | Supported |
| ------- | ------------------ |
| 0.x.x | :white_check_mark: |
## Reporting a Vulnerability
To securely report a vulnerability, please DO NOT create an Issue on this
repository.
Please email [Ben Goldsworthy][bgoldsworthy] privately with the full details of
the vulnerability.
[bgoldsworthy]: mailto:me+threatintelservice@bengoldsworthy.net

7
contrib/README.md Normal file
View File

@ -0,0 +1,7 @@
# Threat Intelligence Service
## Contributed Files
This directory contains:
- Neo4j import scripts (distributed as `.cypher` files).

347
contrib/database.cypher Normal file
View File

@ -0,0 +1,347 @@
//
// Neo4j Graph Database Set-Up Script
//
// This file contains Cypher commands that, when run in a Neo4j graph database,
// will do the following:
//
// - create nodes representing the ISO/IEC 27000-series standard;
// - create nodes and relationships representing the organisations responsible
// for each standard in the series;
// - create nodes and relationships representing each of the sections and
// subsections of the standard considered relevant to classifying threat
// intelligence data;
// - create nodes and relationships representing each control detailed in
// the standard;
// - create nodes representing all top-level industrial classifications from the
// SIC 2020 standard;
// - create nodes representing the various organisational size classifications
// used by HM Government; and
// - create nodes and relationships representing incident probability and
// average cost values derived from the Cyber Security Breaches Survey 2020.
//
// Create nodes representing the ISO/IEC 27000-series standard.
// Create nodes and relationships representing the organisations responsible
// for each standard in the series.
CREATE (bsi:Organisation {name:'British Standards Institution', short_name:'BSI'}),
(iso:Organisation {name:'International Organization for Standardization', short_name:'ISO'}),
(iso)-[:ISSUES]->(iso27799:SecurityStandard {name:'Health informatics — Information security management in health using ISO/IEC 27002', short_name:'BS EN ISO 27799:2016', version:'2016', family:'ISO/IEC 27000-series'})<-[:ISSUES]-(bsi),
(iec:Organisation {name:'International Electrotechnical Commission', short_name:'IEC'}),
(bsi)-[:ISSUES]->(iso27000:SecurityStandard {name:'Information technology — Security techniques — Information security management systems — Overview and vocabulary', short_name:'BSI EN ISO/IEC 27000:2020', version:'2020', family:'ISO/IEC 27000-series'})<-[:ISSUES]-(iso),
(bsi)-[:ISSUES]->(iso27001:SecurityStandard {name:'Information technology — Security techniques — Information security management systems — Requirements', organisation:'BSI', short_name:'BS EN ISO/IEC 27001:2017', version:'2017', family:'ISO/IEC 27000-series'})<-[:ISSUES]-(iso),
(bsi)-[:ISSUES]->(:SecurityStandard {name:'Information technology — Security techniques — Code of practice for information security controls', organisation:'BSI', short_name:'BS EN ISO/IEC 27002:2017', version:'2017', family:'ISO/IEC 27000-series'})<-[:ISSUES]-(iso),
(bsi)-[:ISSUES]->(:SecurityStandard {name:'Information technology — Security techniques — Information security managment for inter-sector and inter-organizational communications', organisation:'BSI', short_name:'BS EN ISO/IEC 27010:2015', version:'2015', family:'ISO/IEC 27000-series'})<-[:ISSUES]-(iso),
(bsi)-[:ISSUES]->(:SecurityStandard {name:'Information technology — Security techniques — Code of practice for information security controls based on ISO/IEC 27002 for cloud services', organisation:'BSI', short_name:'BS EN ISO/IEC 27017:2015', version:'2015', family:'ISO/IEC 27000-series'})<-[:ISSUES]-(iso),
(bsi)-[:ISSUES]->(:SecurityStandard {name:'Information technology — Security techniques — Code of practice for protection of personally identifiable information (PII) in public clouds acting as PII processors', organisation:'BSI', short_name:'BS EN ISO/IEC 27018:2020', version:'2020', family:'ISO/IEC 27000-series'})<-[:ISSUES]-(iso),
(bsi)-[:ISSUES]->(iso29100:SecurityStandard {name:'Information technology — Security techniques — Privacy framework', organisation:'BSI', short_name:'BS EN ISO/IEC 29100:2020', version:'2020'})<-[:ISSUES]-(iso);
MATCH (s:SecurityStandard) WHERE NOT s.short_name='BS EN ISO 27799:2016'
MATCH (iec:Organisation {short_name:'IEC'})
CREATE (iec)-[:ISSUES]->(s);
// Create nodes and relationships representing each of the sections and
// subsections of the standard considered relevant to classifying threat
// intelligence data.
// Create nodes and relationships representing each control detailed in the
// standard.
MATCH (iso27001:SecurityStandard {short_name:'BS EN ISO/IEC 27001:2017'}),
(iso27010:SecurityStandard {short_name:'BS EN ISO/IEC 27010:2015'}),
(iso27017:SecurityStandard {short_name:'BS EN ISO/IEC 27017:2015'}),
(iso27018:SecurityStandard {short_name:'BS EN ISO/IEC 27018:2020'}),
(iso27799:SecurityStandard {short_name:'BS EN ISO 27799:2016'}),
(iso29100:SecurityStandard {short_name:'BS EN ISO/IEC 29100:2020'})
CREATE (s5:SecurityArea {number:'5', name:'Information security policies'})-[:SECTION_OF]->(iso27001),
(s7:SecurityArea {number:'7', name:'Human resources security'})-[:SECTION_OF]->(iso27001),
(s8:SecurityArea {number:'8', name:'Asset management'})-[:SECTION_OF]->(iso27001),
(s9:SecurityArea {number:'9', name:'Access control'})-[:SECTION_OF]->(iso27001),
(s10:SecurityArea {number:'10', name:'Cryptography'})-[:SECTION_OF]->(iso27001),
(s12:SecurityArea {number:'12', name:'Operations security'})-[:SECTION_OF]->(iso27001),
(s13:SecurityArea {number:'13', name:'Communications security'})-[:SECTION_OF]->(iso27001),
(s14:SecurityArea {number:'14', name:'System acquisition, development and maintenance'})-[:SECTION_OF]->(iso27001),
(s15:SecurityArea {number:'15', name:'Supplier relationships'})-[:SECTION_OF]->(iso27001),
(s16:SecurityArea {number:'16', name:'Information security incident management'})-[:SECTION_OF]->(iso27001),
(s17:SecurityArea {number:'17', name:'Information security aspects of business continuity management'})-[:SECTION_OF]->(iso27001),
(s18:SecurityArea {number:'18', name:'Compliance'})-[:SECTION_OF]->(iso27001),
(p11:SecurityArea {number:'11', name:'Information security'})-[:SECTION_OF]->(iso29100),
(s51:SecurityArea {number:'5.1', name:'Management direction for information security', objective:'To provide management direction and support for information security in accordance with business requirements and relevant laws and regulations.'})-[:SUBSECTION_OF]->(s5),
(s72:SecurityArea {number:'7.2', name:'During employment', objective:'To ensure that employees and contractors are aware of and fulfil their information security responsibilities.'})-[:SUBSECTION_OF]->(s7),
(s81:SecurityArea {number:'8.1', name:'Responsibility for assets', objective:'To identify organizational assets and define appropriate protection responsibilities.'})-[:SUBSECTION_OF]->(s8),
(s82:SecurityArea {number:'8.2', name:'Information classification', objective:'To ensure that information receives an appropriate level of protection in accordance with its importance to the organisation.'})-[:SUBSECTION_OF]->(s8),
(iso27010)<-[:INTRODUCED_BY]-(s84:SecurityArea {number:'8.4', name:'Information exchanges protection', objective:'To ensure adequate protection of information exchanges within an information sharing community.'})-[:SUBSECTION_OF]->(s8),
(s91:SecurityArea {number:'9.1', name:'Business requirements of access control', objective:'To limit access to information and information processing facilities.'})-[:SUBSECTION_OF]->(s9),
(s92:SecurityArea {number:'9.2', name:'User access management', objective:'To ensure authorized user access and to prevent unauthorized access to systems and services.'})-[:SUBSECTION_OF]->(s9),
(s93:SecurityArea {number:'9.3', name:'User responsibilities', objective:'To make users accountable for safeguarding their authentication information.'})-[:SUBSECTION_OF]->(s9),
(s94:SecurityArea {number:'9.4', name:'System and application access control', objective:'To prevent unauthorized access to systems and applications.'})-[:SUBSECTION_OF]->(s9),
(s101:SecurityArea {number:'10.1', name:'Cryptographic controls', objective:'To ensure proper and effective use of cryptography to protect the confidentiality, authenticity and/or integrity of information.'})-[:SUBSECTION_OF]->(s10),
(s121:SecurityArea {number:'12.1', name:'Operational procedures and responsibilities', objective:'To ensure correct and secure operations of information processing facilities.'})-[:SUBSECTION_OF]->(s12),
(s123:SecurityArea {number:'12.3', name:'Backup', objective:'To protect against loss of data.'})-[:SUBSECTION_OF]->(s12),
(s124:SecurityArea {number:'12.4', name:'Logging and monitoring', objective:'To record events and generate evidence.'})-[:SUBSECTION_OF]->(s12),
(s126:SecurityArea {number:'12.6', name:'Technical vulnerability management', objective:'To prevent exploitation of technical vulnerabilities.'})-[:SUBSECTION_OF]->(s12),
(s127:SecurityArea {number:'12.7', name:'Information systems audit considerations', objective:'To minimise the impact of audit activities on operational systems.'})-[:SUBSECTION_OF]->(s12),
(s131:SecurityArea {number:'13.1', name:'Network security management', objective:'To ensure the protection of information in networks and its supporting information processing facilities.'})-[:SUBSECTION_OF]->(s13),
(s132:SecurityArea {number:'13.2', name:'Information transfer', objective:'To maintain the security of information transferred within an organization and with any external entity.'})-[:SUBSECTION_OF]->(s13),
(s141:SecurityArea {number:'14.1', name:'Security requirements of information systems', objective:'To ensure that information security is an integral part of information systems across the entire lifecycle. This also includes the requirements for information systems which provide services over public networks.'})-[:SUBSECTION_OF]->(s14),
(s142:SecurityArea {number:'14.2', name:'Security in development and support processes', objective:'To ensure that information security is designed and implemented within the development lifecycle of information systems.'})-[:SUBSECTION_OF]->(s14),
(s143:SecurityArea {number:'14.3', name:'Test data', objective:'To ensure the protection of data used for testing.'})-[:SUBSECTION_OF]->(s14),
(s151:SecurityArea {number:'15.1', name:'Information security in supplier relationships', objective:'To ensure protection of the organization\'s assets that is accessible by suppliers.'})-[:SUBSECTION_OF]->(s15),
(s152:SecurityArea {number:'15.2', name:'Supplier service delivery management', objective:'To maintain an agreed level of information security and service delivery in line with supplier agreements.'})-[:SUBSECTION_OF]->(s15),
(s161:SecurityArea {number:'16.1', name:'Management of information security incidents and improvements', objective:'To ensure a consistent and effective approach to the management of informations security incidents, including communication on security events and weaknesses.'})-[:SUBSECTION_OF]->(s16),
(s171:SecurityArea {number:'17.1', name:'Information security continuity', objective:'Information security continuity shall be embedded in the organization\'s business continuity management systems.'})-[:SUBSECTION_OF]->(s17),
(s172:SecurityArea {number:'17.2', name:'Redundancies', objective:'To ensure availability of information processing facilities.'})-[:SUBSECTION_OF]->(s17),
(s181:SecurityArea {number:'18.1', name:'Compliance with legal and contractual requirements', objective:'To avoid breaches of legal, statutory, regulatory or contractual obligations related to information security and of any security requirements.'})-[:SUBSECTION_OF]->(s18),
(s182:SecurityArea {number:'18.2', name:'Information security reviews', objective:'To ensure that information security is implemented and operated in accordance with the organizational policies and procedures.'})-[:SUBSECTION_OF]->(s18),
(s511:Control {number:'5.1.1', name:'Policies for information security', control:'An information sharing policy should define how the community members will work together to set security management policies and direction for the information sharing community. It should be made available to all employees involved in information sharing within the community. The policy may restrict its dissemination to other employees of community members. The information sharing policy should define the information marking and distribution rules used within the community.'})-[:CONTROL_UNDER]->(s51),
(s511)-[:EXTENDED_BY]->(iso27010),
(s722:Control {number:'7.2.2', name:'Information security awareness, education and training', control:'All employees of the organization and, where relevant, contractors shall receive appropriate awareness education and training and regular updates in organizational policies and procedures, as relevant for their job function.'})-[:CONTROL_UNDER]->(s72),
(s811:Control {number:'8.1.1', name:'Inventory of assets', control:'Information, other assets associated with information and information processing facilities shall be identified and an inventory of these assets shall be drawn up and maintained. The cloud service customer\'s inventory of assets should account for information and associated assets stored in the cloud computing environment. The inventory of assets of the cloud service provider should explicitly identify: cloud service customer data; [and] cloud service derived data.'})-[:CONTROL_UNDER]->(s81),
(s811)-[:EXTENDED_BY]->(iso27017),
(s812:Control {number:'8.1.2', name:'Ownership of assets', control:'Assets maintained in the inventory shall be owned.'})-[:CONTROL_UNDER]->(s81),
(s813:Control {number:'8.1.3', name:'Acceptable use of assets', control:'Rules for the acceptable use of information and of assets associated with information and information processing facilities shall be identified, documented and implemented. Information provided by other members of an information sharing community is an asset and should be protected, used and disseminated in accordance with any rules set by the information sharing community or by the originator.'})-[:CONTROL_UNDER]->(s81),
(s813)-[:EXTENDED_BY]->(iso27010),
(s821:Control {number:'8.2.1', name:'Classification of information', control:'Information shall be classified in terms of legal requirements, value, credibility, priority, criticality and sensitivity to unauthorised disclosure or modification. See ISO 27799 for discussion of the flexibility needed to accommodate personal health information classification.'})-[:CONTROL_UNDER]->(s82),
(iso27799)<-[:EXTENDED_BY]-(s821)-[:EXTENDED_BY]->(iso27010),
(s822:Control {number:'8.2.2', name:'Labelling of information', control:'An appropriate set of procedures for information labelling shall be developed and implemented in accordance with the information classification scheme adopted by the organisation. The cloud service customer should label information and associated assets maintained in the cloud computing environment in accordance with the cloud service customer\'s adopted procedures for labelling. The cloud service provider should document and disclose any service functionality it provides allowing cloud service customers to classify and label their information and associated assets.'})-[:CONTROL_UNDER]->(s82),
(s822)-[:EXTENDED_BY]->(iso27017),
(s823:Control {number:'8.2.3', name:'Handling of assets', control:'Procedures for handling assets shall be developed and implemented in accordance with the information classification scheme adopted by the organisation.'})-[:CONTROL_UNDER]->(s82),
(s841:Control {number:'8.4.1', name:'Information dissemination', control:'Information dissemination within the receiving member should be limited, based on pre-defined dissemination markings defined by the community.'})-[:CONTROL_UNDER]->(s84),
(s842:Control {number:'8.4.2', name:'Information disclaimers', control:'Each information exchange should begin with a disclaimer, listing any special requirements to follow by the recipients in addition to the normal information markings.'})-[:CONTROL_UNDER]->(s84),
(s843:Control {number:'8.4.3', name:'Information credibility', control:'Each information exchange should indicate the originator\'s degree of confidence in the transmitted information\'s credibility and accuracy.'})-[:CONTROL_UNDER]->(s84),
(s844:Control {number:'8.4.4', name:'Information sensitivity reduction', control:'The originator of an information exchange should indicate if the sensitivity of the information supplied will reduce after some external event, or the passage of time.'})-[:CONTROL_UNDER]->(s84),
(s845:Control {number:'8.4.5', name:'Anonymous source protection', control:'A community member should remove any source identification information in any communication it originates or receives where anonymity is requested.'})-[:CONTROL_UNDER]->(s84),
(s846:Control {number:'8.4.6', name:'Anonymous recipient protection', control:'With the approval of the originator, members of a community should be able to receive communications without revealing their own identities.'})-[:CONTROL_UNDER]->(s84),
(s847:Control {number:'8.4.7', name:'Onwards release authority', control:'Unless it is marked for wider release, information should not be distributed beyond the information sharing community without formal approval from the originator.'})-[:CONTROL_UNDER]->(s84),
(s911:Control {number:'9.1.1', name:'Access control policy', control:'An access control policy shall be established, documented and reviewed based on business and information security requirements.'})-[:CONTROL_UNDER]->(s91),
(s912:Control {number:'9.1.2', name:'Access to networks and network services', control:'Users shall only be provided with access to the network and network services that they have been specifically authorized to use.'})-[:CONTROL_UNDER]->(s91),
(s921:Control {number:'9.2.1', name:'User registration and de-registration', control:'A formal user registration and de-registration process shall be implemented to enable assignment of access rights.'})-[:CONTROL_UNDER]->(s92),
(s922:Control {number:'9.2.2', name:'User access provisioning', control:'A formal user access provisioning process shall be implemented to assign or revoke access rights for all user types to all systems and services. The cloud service provider should provide functions for managing the access rights of the cloud service customer\'s cloud service users, and specifications for the use of these functions.'})-[:CONTROL_UNDER]->(s92),
(s922)-[:EXTENDED_BY]->(iso27017),
(s923:Control {number:'9.2.3', name:'Management of privileged access rights', control:'The allocation and use of privileged access rights shall be restricted and controlled.'})-[:CONTROL_UNDER]->(s92),
(s924:Control {number:'9.2.4', name:'Management of secret authentication information of users', control:'The allocation of secret authentication information shall be controlled through a formal management process...it should be noted that time pressures found in health delivery situations can make effective use of passwords difficult to employ. Many health organizations have considered the adoption of alternative authentication technologies to address this problem.'})-[:CONTROL_UNDER]->(s92),
(s924)-[:EXTENDED_BY]->(iso27799),
(s925:Control {number:'9.2.5', name:'Review of user access rights', control:'Asset owners shall review users\' access rights at regular intervals.'})-[:CONTROL_UNDER]->(s92),
(s926:Control {number:'9.2.6', name:'Removal or adjustment of access rights', control:'The access rights of all employees and external party users to information and information processing facilities shall be removed upon termination of their employment, contract or agreement, or adjusted upon change.'})-[:CONTROL_UNDER]->(s92),
(s931:Control {number:'9.3.1', name:'Use of secret authentication information', control:'Users shall be required to follow the organization\'s practices in the use of secret authentication information.'})-[:CONTROL_UNDER]->(s93),
(s941:Control {number:'9.4.1', name:'Information access restriction', control:'Access to information and application system functions shall be restricted in accordance with the access control policy. The cloud service customer should ensure that access to information in the cloud service can be restricted in accordance with its access control policy and that such restrictions are realized. The cloud service provider should provide access controls that allow the cloud service customer to restrict access to its cloud services, its cloud service functions and the cloud service customer data maintained in the service.'})-[:CONTROL_UNDER]->(s94),
(s941)-[:EXTENDED_BY]->(iso27017),
(s942:Control {number:'9.4.2', name:'Secure log-on procedures', control:'Where required by the access control policy, access to systems and applications shall be controlled by a secure log-in procedure.'})-[:CONTROL_UNDER]->(s94),
(s943:Control {number:'9.4.3', name:'Password management system', control:'Password management systems shall be interactive and shall ensure quality passwords.'})-[:CONTROL_UNDER]->(s94),
(s944:Control {number:'9.4.4', name:'Use of privileged utility programs', control:'The use of utility programs that might be capable of overriding system and application controls shall be restricted and tightly controlled.'})-[:CONTROL_UNDER]->(s94),
(s945:Control {number:'9.4.5', name:'Access control to program source code', control:'Access to program source code shall be restricted.'})-[:CONTROL_UNDER]->(s94),
(s1011:Control {number:'10.1.1', name:'Policy on the use of cryptographic controls', control:'A policy on the use of cryptographic controls for protection of information shall be developed and implemented. Cryptographic techniques can also be used to implement the dissemination rules of information sharing.'})-[:CONTROL_UNDER]->(s101),
(s1011)-[:EXTENDED_BY]->(iso27010),
(s1012:Control {number:'10.1.2', name:'Key management', control:'A policy on the use, protection and lifetime of cryptographic keys shall be developed and implemented through their whole lifecycle.'})-[:CONTROL_UNDER]->(s101),
(s1211:Control {number:'12.1.1', name:'Documented operating procedures', control:'Operating procedures shall be documented and made available to all users who need them.'})-[:CONTROL_UNDER]->(s121),
(s1212:Control {number:'12.1.2', name:'Change management', control:'Changes to the organization, business processes, information processing facilities and systems that affect information security shall be controlled. The cloud service provider should provide the cloud service customer with information regarding changes to the cloud service that could adversely affect the cloud service.'})-[:CONTROL_UNDER]->(s121),
(s1212)-[:EXTENDED_BY]->(iso27017),
(s1213:Control {number:'12.1.3', name:'Capacity management', control:'The use of resources shall be monitored, tuned and projections made of future capacity requirements to ensure the required system performance. The cloud service provider should monitor the total resource capacity to prevent information security incidents caused by resource shortages.'})-[:CONTROL_UNDER]->(s121),
(s1213)-[:EXTENDED_BY]->(iso27017),
(s1214:Control {number:'12.1.4', name:'Separation of development, testing and operational environments', control:'Development, testing, and operational environments shall be separated to reduce the risks of unauthorized access of changes to the operational environment. Where the use of PII for testing purposes cannot be avoided a risk assessment should be undertaken.'})-[:CONTROL_UNDER]->(s121),
(s1214)-[:EXTENDED_BY]->(iso27018),
(s1231:Control {number:'12.3.1', name:'Information backup', control:'Backup copies of information, software and system images shall be taken and tested regularly in accordance with an agreed backup policy. Information processing systems based on the cloud computing model introduce additional or alternative mechanisms to off-site backups for protecting against loss of data, ensuring continuity of data processing operations, and providing the ability to restore data processing operations after a disruptive event. PII-specific responsibilities in this respect can lie with the cloud service customer.'})-[:CONTROL_UNDER]->(s123),
(s1231)-[:EXTENDED_BY]->(iso27018),
(s1241:Control {number:'12.4.1', name:'Event logging', control:'Event logs recording user activities, exceptions, faults and information security events shall be produced, kept and regularly reviewed. When required by the information sharing community, members should log the internal dissemination of shared information. The cloud service provider should provide logging capabilities to the cloud service customer. Where possible, event logs should record whether or not PII has been changed as a result of an event and by whom.'})-[:CONTROL_UNDER]->(s124),
(s1241)-[:EXTENDED_BY]->(iso27010),
(s1241)-[:EXTENDED_BY]->(iso27017),
(s1241)-[:EXTENDED_BY]->(iso27018),
(s1242:Control {number:'12.4.2', name:'Protection of log information', control:'Logging facilities and log information shall be protected against tampering and authorized access. A procedure, preferably automatic, should be put in place to ensure that logged information is deleted within a specified and documented period.'})-[:CONTROL_UNDER]->(s124),
(s1242)-[:EXTENDED_BY]->(iso27018),
(s1243:Control {number:'12.4.3', name:'Administrator and operator logs', control:'System administrator and system operator activities shall be logged and the logs protected and regularly reviewed.'})-[:CONTROL_UNDER]->(s124),
(s1244:Control {number:'12.4.4', name:'Clock synchronisation', control:'The clocks of all relevant information processing systems within an organization or security domain shall be synchronised to a single reference time source. The cloud service provider should provide information to the cloud service customer regarding the clock used by the cloud service provider\'s systems, and information about how the cloud service customer can synchronize local clocks with the cloud service clock.'})-[:CONTROL_UNDER]->(s124),
(s1244)-[:EXTENDED_BY]->(iso27017),
(s1245:Control {number:'12.4.5', name:'Monitoring of Cloud Services', control:'The cloud service customer should have the capability to monitor specified aspects of the operation of the cloud services that the cloud service customer uses.'})-[:CONTROL_UNDER]->(s124),
(s1245)-[:EXTENDED_BY]->(iso27017),
(s1261:Control {number:'12.6.1', name:'Management of technical vulnerabilities', control:'Information about technical vulnerabilities or information systems being used shall be obtained in a timely fashion, the organization\'s exposure to such vulnerabilities evaluated and appropriate measures taken to address the associated risk. The cloud service provider should make available to the cloud service customer information about the management of technical vulnerabilities that can affect the cloud services provided.'})-[:CONTROL_UNDER]->(s126),
(s1261)-[:EXTENDED_BY]->(iso27017),
(s1271:Control {number:'12.7.1', name:'Information systems audit controls', control:'Audit requirements and activities involving verification of operational systems shall be carefully planned and agreed to minimise disruptions to business processes.'})-[:CONTROL_UNDER]->(s127),
(iso27010)<-[:INTRODUCED_BY]-(s1272:Control {number:'12.7.2', name:'Community audit rights', control:'Every information sharing community should specify the rights of members to audit the systems of other members and of any trusted service providers.'})-[:CONTROL_UNDER]->(s127),
(s1311:Control {number:'13.1.1', name:'Network controls', control:'Networks shall be managed and controlled to protect information in systems and applications.'})-[:CONTROL_UNDER]->(s131),
(s1312:Control {number:'13.1.2', name:'Security of network services', control:'Security mechanisms, service levels and management requirements of all network services shall be identified and included in network services agreements, whether these services are provided in-house or outsourced.'})-[:CONTROL_UNDER]->(s131),
(s1313:Control {number:'13.1.3', name:'Segregation in networks', control:'Groups of information services, users and information systems shall be segregated on networks. The cloud service provider should enforce segregation of network access for the following cases: segregation between tenants in a multi-tenant environment; [and] segregation between the cloud service provider\'s internal administration environment and the cloud service customer\'s cloud computing environment. Where appropriate, the cloud service provider should help the cloud service customer verify the segregation implemented by the cloud service provider.'})-[:CONTROL_UNDER]->(s131),
(s1313)-[:EXTENDED_BY]->(iso27017),
(s1321:Control {number:'13.2.1', name:'Information transfer policies and procedures', control:'Formal transfer policies, procedures and controls shall be in place to protect the transfer of information through the use of all types of communication facilities.'})-[:CONTROL_UNDER]->(s132),
(s1322:Control {number:'13.2.2', name:'Agreements on information transfer', control:'Agreements shall address the secure transfer of business information between the organization and external parties. All information sharing communities should define information transfer agreements, and should only permit members to join the community if such agreements are signed and accepted.'})-[:CONTROL_UNDER]->(s132),
(s1322)-[:EXTENDED_BY]->(iso27010),
(s1323:Control {number:'13.2.3', name:'Electronic messaging', control:'Information involved in electronic messaging shall be appropriately protected. All information sharing communities should define rules for the protection of information in transit, and only permit members to join the community if such rules are accepted and implemented by the prospective member. Any supporting entity should implement such rules internally. Information sharing communities should consider implementing alternative mechanisms for information sharing that do not rely on electronic messaging, and enabling members to specify that specific messages are distributed by such other routes'})-[:CONTROL_UNDER]->(s132),
(s1323)-[:EXTENDED_BY]->(iso27010),
(s1324:Control {number:'13.2.4', name:'Confidentiality or non-disclosure agreements', control:'Requirements for confidentiality or non-disclosure agreements reflecting the organization\'s needs for the protection of information shall be identified, regularly reviewed and documented.'})-[:CONTROL_UNDER]->(s132),
(s1411:Control {number:'14.1.1', name:'Information security requirements analysis and specification', control:'The information security related requirements shall be included in the requirements for new information systems or enhancements to existing information systems.'})-[:CONTROL_UNDER]->(s141),
(s1412:Control {number:'14.1.2', name:'Securing application services on public networks', control:'Information involved in application services passing over public networks shall be protected from fraudulent activity, contract dispute and unauthorized disclosure and modification.'})-[:CONTROL_UNDER]->(s141),
(s1413:Control {number:'14.1.3', name:'Protecting application services transactions', control:'Information involved in application service transactions shall be protected to prevent incomplete transmission, mis-routing, unauthorized message alteration, unauthorized disclosure, unauthorized message duplication or replay.'})-[:CONTROL_UNDER]->(s141),
(s1421:Control {number:'14.2.1', name:'Secure development policy', control:'Rules for the development of software and systems shall be established and applied to developments within the organization.'})-[:CONTROL_UNDER]->(s142),
(s1422:Control {number:'14.2.2', name:'System change control procedures', control:'Changes to systems within the development lifecycle shall be controlled by the use of formal change control procedures.'})-[:CONTROL_UNDER]->(s142),
(s1423:Control {number:'14.2.3', name:'Technical review of applications after operating platform changes', control:'When operating platforms are changed, business critical applications shall be reviewed and tested to ensure there is no adverse impact on organizational operations or security.'})-[:CONTROL_UNDER]->(s142),
(s1424:Control {number:'14.2.4', name:'Restrictions on changes to software packages', control:'Modifications to software packages shall be discouraged, limited to necessary changes and all changes shall be strictly controlled.'})-[:CONTROL_UNDER]->(s142),
(s1425:Control {number:'14.2.5', name:'Secure system engineering principles', control:'Principles for engineering secure systems shall be established, documented, maintained and applied to any information system implementation efforts.'})-[:CONTROL_UNDER]->(s142),
(s1426:Control {number:'14.2.6', name:'Secure development environment', control:'Organizations shall establish and appropriately protect secure development environments for system development and integration efforts that cover the entire system development lifecycle.'})-[:CONTROL_UNDER]->(s142),
(s1427:Control {number:'14.2.7', name:'Outsourced development', control:'The organization shall supervise and monitor the activity of outsources system development.'})-[:CONTROL_UNDER]->(s142),
(s1428:Control {number:'14.2.8', name:'System security testing', control:'Testing of security functionality shall be carried out during development.'})-[:CONTROL_UNDER]->(s142),
(s1429:Control {number:'14.2.9', name:'System acceptance testing', control:'Acceptance testing programs and related criteria shall be established for new information systems, upgrades and new versions.'})-[:CONTROL_UNDER]->(s142),
(s1431:Control {number:'14.3.1', name:'Protection of test data', control:'Test data shall be selected carefully, protected and controlled.'})-[:CONTROL_UNDER]->(s143),
(s1511:Control {number:'15.1.1', name:'Information security policy for supplier relationships', control:'Information security requirements for mitigating the risks associated with supplier\'s access to the organization\'s assets shall be agreed with the supplier and documented. The cloud service customer should include the cloud service provider as a type of supplier in its information security policy for supplier relationships.'})-[:CONTROL_UNDER]->(s151),
(s1511)-[:EXTENDED_BY]->(iso27017),
(s1512:Control {number:'15.1.2', name:'Addressing security within supplier agreements', control:'All relevant information security requirements shall be established and agreed with each supplier that may access, process, store, communicate, or provide IT infrastructure components for, the organization\'s information. All community members should be made aware of the identities of all third parties involved in the provision of community services, in case they have objections to particular parties being involved in the handling of information they provide.'})-[:CONTROL_UNDER]->(s151),
(s1512)-[:EXTENDED_BY]->(iso27010),
(s1513:Control {number:'15.1.3', name:'Information and communication technology supply chain', control:'Agreements with suppliers shall include requirements to address the information security risks associated with information and communications technology services and product the supply chain.'})-[:CONTROL_UNDER]->(s151),
(s1521:Control {number:'15.2.1', name:'Monitoring and review of supplier services', control:'Organizations shall regularly monitor, review and audit supplier service delivery.'})-[:CONTROL_UNDER]->(s152),
(s1611:Control {number:'16.1.1', name:'Responsibilities and procedures', control:'Management responsibilities and procedures shall be established to ensure a quick, effective and orderly response to information security incidents. An information security incident should trigger a review by the public cloud PII processor, as part of its information security management process, to determine if a data breach involving PII has taken place.'})-[:CONTROL_UNDER]->(s161),
(s1611)-[:EXTENDED_BY]->(iso27018),
(s1612:Control {number:'16.1.2', name:'Reporting information security incidents', control:'Information security events shall be reported through appropriate management channels as quickly as possible. Members of an information sharing community should consider whether detected events should be reported to other members of the community. The community should agree and publish guidance on the types of incident that will be of interest to other members. The cloud service provider should provide mechanisms for: the cloud service customer to report an information security event to the cloud service provider; the cloud service provider to report an information security event to a cloud service customer; [and] the cloud service customer to track the status of a reported information security event.'})-[:CONTROL_UNDER]->(s161),
(s1612)-[:EXTENDED_BY]->(iso27010),
(s1612)-[:EXTENDED_BY]->(iso27017),
(s1613:Control {number:'16.1.3', name:'Reporting information security weaknesses', control:'Employees and contractors using the organization\'s information systems and services shall be required to note and report any observed or suspected information security weaknesses in systems or services.'})-[:CONTROL_UNDER]->(s161),
(s1614:Control {number:'16.1.4', name:'Assessment of an decision on information security events', control:'Information security events shall be assessed and it shall be decided if they are to be classified as information security incidents.'})-[:CONTROL_UNDER]->(s161),
(s1615:Control {number:'16.1.5', name:'Response to information security incidents', control:'Information security incidents shall be responded to in accordance with the documented procedures.'})-[:CONTROL_UNDER]->(s161),
(s1616:Control {number:'16.1.6', name:'Learning from information security incidents', control:'Knowledge gained from analysing and resolving information security incidents shall be used to reduce the likelihood or impact of future incidents. Investigations based on information distributed by an information sharing community should be performed, to reduce the risks of similar incidents and develop a better understanding of the risks facing the community and any related significant information infrastructures.'})-[:CONTROL_UNDER]->(s161),
(s1616)-[:EXTENDED_BY]->(iso27010),
(s1617:Control {number:'16.1.7', name:'Collection of evidence', control:'The organization shall define and apply procedures for the identification, collection, acquisition and preservation of information, which can serve as evidence.'})-[:CONTROL_UNDER]->(s161),
(s1618:Control {number:'16.1.8', name:'Early warning system', control:'An early warning system should be deployed within the information sharing community to effectively communicate priority information as soon as it is available.'})-[:CONTROL_UNDER]->(s161),
(s1711:Control {number:'17.1.1', name:'Planning information security continuity', control:'The organization shall determine its requirements for information security and the continuity of information security management in adverse situations, e.g. during a crisis or disaster.'})-[:CONTROL_UNDER]->(s171),
(s1712:Control {number:'17.1.2', name:'Implementing information security continuity', control:'The organization shall establish, document, implement and maintain processes, procedures and controls to ensure the required level of continuity for information security during an adverse situation.'})-[:CONTROL_UNDER]->(s171),
(s1713:Control {number:'17.1.3', name:'Verify, review and evaluate information security continuity', control:'The organization shall verify the established and implemented information security continuity controls at regular intervals in order to ensure that they are valid and effective during adverse situations.'})-[:CONTROL_UNDER]->(s171),
(s1721:Control {number:'17.2.1', name:'Availability of information processing facilities', control:'Information processing facilities shall be implemented with redundancy sufficient to meet availability requirements.'})-[:CONTROL_UNDER]->(s172),
(s1811:Control {number:'18.1.1', name:'Identification of applicable legislation and contractual requirements', control:'All relevant legislative statutory, regulatory, contractual requirements and the organization\'s approach to meet these requirements shall be explicitly identified, documented and kept up to date for each information system and the organization. The information sharing community should take due account of any relevant agreements, laws and regulations relating to information sharing, such as anti-cartel legislation or regulations. This could prevent certain organizations joining the community, or place restrictions upon their representation.'})-[:CONTROL_UNDER]->(s181),
(s1811)-[:EXTENDED_BY]->(iso27010),
(s1812:Control {number:'18.1.2', name:'Intellectual property rights', control:'Appropriate procedures shall be implemented to ensure compliance with legislative, regulatory and contractual requirements related to intellectual property rights and use of proprietary software products. The cloud service provider should establish a process for responding to intellectual property rights complaints.'})-[:CONTROL_UNDER]->(s181),
(s1812)-[:EXTENDED_BY]->(iso27017),
(s1813:Control {number:'18.1.3', name:'Protection of records', control:'Records shall be protected from loss, destruction, falsification, unauthorized access and unauthorized release, in accordance with legislatory, regulatory, contractual and business requirements.'})-[:CONTROL_UNDER]->(s181),
(s1814:Control {number:'18.1.4', name:'Privacy and protection of personally identifiable information', control:'Privacy and protection of personally identifiable information shall be ensured as required in relevant legislation and regulation where applicable.'})-[:CONTROL_UNDER]->(s181),
(s1815:Control {number:'18.1.5', name:'Regulation of cryptographic controls', control:'Cryptographic controls shall be used in compliance with all relevant agreements, legislation and regulations.'})-[:CONTROL_UNDER]->(s181),
(iso27010)<-[:INTRODUCED_BY]-(s1816:Control {number:'18.1.6', name:'Liability to the information sharing community', control:'Liability issues and remediation should be clarified, understood and approved by all members of an information sharing community, to address situations in which information is intentionally or unintentionally disclosed.'})-[:CONTROL_UNDER]->(s181),
(s1821:Control {number:'18.2.1', name:'Independent review of information security', control:'The organization\'s approach to managing information security and its implementation (i.e. control objectives, controls, policies, processes and procedures for information security) shall be reviewed independently at planned intervals or when significant changes occur.'})-[:CONTROL_UNDER]->(s182),
(s1822:Control {number:'18.2.2', name:'Compliance with security policies and standards', control:'Managers shall regularly review the compliance of information processing and procedures within their area of responsibility with the appropriate security policies, standards and any other security requirements.'})-[:CONTROL_UNDER]->(s182),
(s1823:Control {number:'18.2.3', name:'Technical compliance review', control:'Information systems shall be regularly reviewed for compliance with the organization\'s information security policies and standard.'})-[:CONTROL_UNDER]->(s182),
(p112:Control {number:'11.2', name:'Restriction on the creation of hardcopy material', control:'The creation of hardcopy material displaying PII should be restricted.'})-[:CONTROL_UNDER]->(p11),
(p116:Control {number:'11.6', name:'Encryption of PII transmitted over public data-transmission networks', control:'PII that is transmitted over public data-transmission networks should be encrypted prior to transmission.'})-[:CONTROL_UNDER]->(p11),
(p118:Control {number:'11.8', name:'Unique use of user ID', control:'If more than one individual has access to stored PII, then they should each have a distinct user ID for identification, authentication and authorization purposes.'})-[:CONTROL_UNDER]->(p11),
(p119:Control {number:'11.9', name:'Records of authorized users', control:'An up-to-date record of the users or profiles of users who have authorized access to the information system should be maintained.'})-[:CONTROL_UNDER]->(p11),
(p1110:Control {number:'11.10', name:'User ID management', control:'De-activated or expired user IDs should not be granted to other individuals.'})-[:CONTROL_UNDER]->(p11);
// Create nodes representing all top-level industrial classifications from the
// SIC standard.
CREATE (all:Industry {name:'All'}),
(:Industry {id:'A', name:'Agriculture, Forestry and Fishing'})-[:SUBSET_OF]->(all),
(:Industry {id:'B', name:'Mining and quarrying'})-[:SUBSET_OF]->(all),
(:Industry {id:'C', name:'Manufacturing'})-[:SUBSET_OF]->(all),
(:Industry {id:'D', name:'Electricity, Gas, Steam and air conditioning'})-[:SUBSET_OF]->(all),
(:Industry {id:'E', name:'Water supply, sewerage, waste management and remediation activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'F', name:'Construction'})-[:SUBSET_OF]->(all),
(:Industry {id:'G', name:'Wholesale and retail trade; repair of motor vehicles and motorcycles'})-[:SUBSET_OF]->(all),
(:Industry {id:'H', name:'Transport and storage'})-[:SUBSET_OF]->(all),
(:Industry {id:'I', name:'Accommodation and food service activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'J', name:'Information and communication'})-[:SUBSET_OF]->(all),
(:Industry {id:'K', name:'Financial and insurance activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'L', name:'Real estate activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'M', name:'Professional, scientific and technical activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'N', name:'Administrative and support service activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'O', name:'Public administration and defence; compulsory social security'})-[:SUBSET_OF]->(all),
(:Industry {id:'P', name:'Education'})-[:SUBSET_OF]->(all),
(:Industry {id:'Q', name:'Human health and social work activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'R', name:'Arts, entertainment and recreation'})-[:SUBSET_OF]->(all),
(:Industry {id:'S', name:'Other service activities'})-[:SUBSET_OF]->(all),
(:Industry {id:'T', name:'Activities of households as employers, undifferentiated goods and service producing activities of households for own use'})-[:SUBSET_OF]->(all),
(:Industry {id:'U', name:'Activities of extraterritorial organisations and bodies'})-[:SUBSET_OF]->(all);
// Create nodes representing the various organisational size classifications
// used my HM Government.
CREATE (all:Size {name:'All'}),
(:Size {name:'Micro'})-[:SUBSET_OF]->(all),
(:Size {name:'Small'})-[:SUBSET_OF]->(all),
(:Size {name:'Medium'})-[:SUBSET_OF]->(all),
(:Size {name:'Large'})-[:SUBSET_OF]->(all);
// Create nodes and relationships representing incident probability and average
// cost values derived from the 2020 _Cyber Security Breaches Survey_.
MATCH (allsize:Size {name:'All'}),
(micro:Size {name:'Micro'}),
(small:Size {name:'Small'}),
(medium:Size {name:'Medium'}),
(large:Size {name:'Large'}),
(allind:Industry {name:'All'}),
(infocoms:Industry {id:'J'}),
(profscitech:Industry {id:'M'}),
(admin:Industry {id:'N'}),
(realestate:Industry {id:'L'}),
(s72:SecurityArea {number:'7.2'}),
(c722:Control {number:'7.2.2'}),
(s81:SecurityArea {number:'8.1'}),
(s82:SecurityArea {number:'8.2'}),
(s92:SecurityArea {number:'9.2'}),
(s94:SecurityArea {number:'9.4'}),
(s121:SecurityArea {number:'12.1'}),
(s123:SecurityArea {number:'12.3'}),
(s124:SecurityArea {number:'12.4'}),
(s126:SecurityArea {number:'12.6'}),
(c1231:Control {number:'12.3.1'}),
(c1261:Control {number:'12.6.1'}),
(s131:SecurityArea {number:'13.1'}),
(s132:SecurityArea {number:'13.2'}),
(s141:SecurityArea {number:'14.1'}),
(s151:SecurityArea {number:'15.1'}),
(s172:SecurityArea {number:'17.2'}),
(s181:SecurityArea {number:'18.1'})
CREATE (csbs2020:DataSource {name:'Cyber Security Breaches Survey', date:'2020'}),
(i511:IncidentProbability {probability:46})-[:FROM]->(csbs2020),
(allsize)<-[:FOR_SIZE]-(i511)-[:FOR_INDUSTRY]->(allind),
(i512:IncidentProbability {probability:43})-[:FROM]->(csbs2020),
(i512)-[:FOR_SIZE]->(micro),
(i513:IncidentProbability {probability:62})-[:FROM]->(csbs2020),
(i513)-[:FOR_SIZE]->(small),
(i514:IncidentProbability {probability:68})-[:FROM]->(csbs2020),
(i514)-[:FOR_SIZE]->(medium),
(i515:IncidentProbability {probability:75})-[:FROM]->(csbs2020),
(i515)-[:FOR_SIZE]->(large),
(i516:IncidentProbability {probability:62})-[:FROM]->(csbs2020),
(i516)-[:FOR_INDUSTRY]->(infocoms),
(i517:IncidentProbability {probability:59})-[:FROM]->(csbs2020),
(i517)-[:FOR_INDUSTRY]->(profscitech),
(i518:IncidentProbability {probability:58})-[:FROM]->(csbs2020),
(realestate)<-[:FOR_INDUSTRY]-(i518)-[:FOR_INDUSTRY]->(admin),
(i521:IncidentProbability {probability:46.44, desc:'Fraudulent emails or being directed to fraudulent websites'})-[:FROM]->(csbs2020),
(s72)<-[:FOR_AREA]-(i521)-[:FOR_AREA]->(s121),
(s123)<-[:FOR_AREA]-(i521)-[:FOR_AREA]->(s126),
(i522:IncidentProbability {probability:14.04, desc:'Others impersonating organisation in emails or online'})-[:FROM]->(csbs2020),
(s72)<-[:FOR_AREA]-(i522)-[:FOR_AREA]->(s82),
(s121)<-[:FOR_AREA]-(i522)-[:FOR_AREA]->(s132),
(i523:IncidentProbability {probability:8.64, desc:'Viruses, spyware or malware'})-[:FROM]->(csbs2020),
(s72)<-[:FOR_AREA]-(i523)-[:FOR_AREA]->(s92),
(s94)<-[:FOR_AREA]-(i523)-[:FOR_AREA]->(s123),
(s126)<-[:FOR_AREA]-(i523)-[:FOR_AREA]->(s131),
(i523)-[:FOR_AREA]->(s172),
(i524:IncidentProbability {probability:4.86, desc:'Hacking or attempted hacking of online bank accounts'})-[:FROM]->(csbs2020),
(s72)<-[:FOR_AREA]-(i524)-[:FOR_AREA]->(s81),
(s92)<-[:FOR_AREA]-(i524)-[:FOR_AREA]->(s94),
(s121)<-[:FOR_AREA]-(i524)-[:FOR_AREA]->(s124),
(i524)-[:FOR_AREA]->(s181),
(i525:IncidentProbability {probability:4.31, desc:'Ransomware'})-[:FROM]->(csbs2020),
(s72)<-[:FOR_AREA]-(i525)-[:FOR_AREA]->(s92),
(s94)<-[:FOR_AREA]-(i525)-[:FOR_AREA]->(s123),
(i526:IncidentProbability {probability:3.24, desc:'Unauthorised use of computers, networks or servers by outsiders'})-[:FROM]->(csbs2020),
(s72)<-[:FOR_AREA]-(i526)-[:FOR_AREA]->(s92),
(s94)<-[:FOR_AREA]-(i526)-[:FOR_AREA]->(s124),
(s126)<-[:FOR_AREA]-(i526)-[:FOR_AREA]->(s141),
(s151)<-[:FOR_AREA]-(i526)-[:FOR_AREA]->(s181),
(i527:IncidentProbability {probability:1.62, desc:'Unauthorised use of computers, networks or servers by staff'})-[:FROM]->(csbs2020),
(s72)<-[:FOR_AREA]-(i527)-[:FOR_AREA]->(s92),
(s94)<-[:FOR_AREA]-(i527)-[:FOR_AREA]->(s123),
(i527)-[:FOR_AREA]->(s124),
(i54:IncidentBaseFrequencyProbabilities {probabilities:[0.23, 0.22, 0.21, 0.15, 0.08, 0.09], probabilities_labels:['Once per year', 'Less than one a month', 'Once a month', 'Once a week', 'Once a day', 'Several times a day']})-[:FROM]->(csbs2020);
MATCH (allsize:Size {name:'All'}),
(micro:Size {name:'Micro'}),
(small:Size {name:'Small'}),
(medium:Size {name:'Medium'}),
(large:Size {name:'Large'}),
(allind:Industry {name:'All'}),
(csbs2020:DataSource {name:'Cyber Security Breaches Survey'})
CREATE (i521:IncidentCostAverages {mean:3230, median: 274})-[:FROM]->(csbs2020),
(allsize)<-[:FOR_SIZE]-(i521)-[:FOR_INDUSTRY]->(allind),
(i522:IncidentCostAverages {mean:3110, median: 244})-[:FROM]->(csbs2020),
(micro)<-[:FOR_SIZE]-(i522)-[:FOR_SIZE]->(small),
(i522)-[:FOR_INDUSTRY]->(allind),
(i523:IncidentCostAverages {mean:5220, median:935})-[:FROM]->(csbs2020),
(medium)<-[:FOR_SIZE]-(i523)-[:FOR_SIZE]->(large),
(i523)-[:FOR_INDUSTRY]->(allind);

48
requirements.txt Normal file
View File

@ -0,0 +1,48 @@
appdirs==1.4.4
astroid==2.5
black==20.8b1
certifi==2020.12.5
cffi==1.14.5
chardet==4.0.0
click==7.1.2
cryptography==3.4.6
cycler==0.10.0
decouple==0.0.7
docker==4.4.4
english==2020.7.0
idna==2.10
isort==5.7.0
kiwisolver==1.3.1
lazy-object-proxy==1.5.2
matplotlib==3.3.4
mccabe==0.6.1
monotonic==1.5
mypy-extensions==0.4.3
neotime==1.7.4
numpy==1.20.1
packaging==20.9
pandas==1.2.2
pansi==2020.7.3
pathspec==0.8.1
patsy==0.5.1
Pillow==8.1.0
prompt-toolkit==2.0.10
py2neo==2021.0.1
pycparser==2.20
Pygments==2.8.0
pylint==2.7.1
pyparsing==2.4.7
python-dateutil==2.8.1
pytz==2021.1
regex==2020.11.13
requests==2.25.1
scipy==1.6.1
six==1.15.0
statsmodels==0.12.2
toml==0.10.2
typed-ast==1.4.2
typing-extensions==3.7.4.3
urllib3==1.26.3
wcwidth==0.2.5
websocket-client==0.57.0
wrapt==1.12.1

696
src/graph.py Normal file
View File

@ -0,0 +1,696 @@
"""
Neo4j Graph Database Interface
This module defines:
a) the interface for interacting with the Neo4j graph database; and
b) subclasses of `Relationship`.
"""
import re
import sys
import logging as log
from typing import List, Tuple, Union, Dict
from datetime import datetime
from py2neo import Graph, Node, NodeMatcher, Relationship, DatabaseError
import numpy as np
class GraphInterface:
"""
An interface for the Neo4j graph database used to hold TI data.
This interface abstracts out the actual transactions, allowing a user
to use more friendly methods without worrying about the implementation or
learning the Cypher query language syntax.
This class should:
a) determine the correct transactions to use based on the called
method and any arguments;
b) return only `Node`s, `Relationship`s, `SubGraph`s or lists thereof,
so that the values can be assigned to subclasses of those at the
point of calling; and
c) deal with any `Exception`s, but not issues like returning 0 results,
which should be dealt with at the point of calling.
"""
g: Graph = None
@staticmethod
def __init__():
try:
if GraphInterface.g is None:
GraphInterface.g = Graph(password="test")
log.info("Neo4j database connection opened successfully.")
else:
log.warning(
"Neo4j database already connected - this branch "
"shouldn't have been hit though!"
)
except DatabaseError:
log.error("ERR: Neo4j database connection not successfully opened.")
sys.exit()
@staticmethod
def delete_distributions() -> bool:
"""Deletes any pre-existing distributions."""
GraphInterface.g.run(
"MATCH (n) "
"WHERE n:IncidentFrequencyDistribution OR n:IncidentCostsDistribution "
"DETACH DELETE n;"
)
return True
@staticmethod
def get_incident_frequency_probabilities(
boundaries, pairing: Tuple = ("All", "All")
) -> List[float]:
"""
Attempts to get a list of probabilities for different annual incident
frequencies, specific to the organisational details provided.
It first gets (the average of) any sets of base frequencies, then looks
up the provided size/industry values to see if they have any assigned
breach probability values in the graph database. If multiple values are
found, the average is taken.
Once the specific base (i.e., >0) probability is found, it then recalculates
the overall set of probabilities as proportions of that base figure.
"""
size = pairing[0]
industry = pairing[1]
log.info(
"Attempting to get breach frequency probabilities specific to ('%s', '%s')...",
size,
industry,
)
base_frequency_probabilities_nodes = GraphInterface._get_nodes(
"IncidentBaseFrequencyProbabilities"
)
base_frequency_probabilities = [
node["probabilities"]
for node in base_frequency_probabilities_nodes
if len(node["probabilities"]) == (len(boundaries) - 1)
]
# If there are >1 sets of likelihoods, gets the mean for each boundary value.
if len(base_frequency_probabilities) > 1:
log.info("Multiple sets of base frequencies found, averaging...")
base_frequency_probabilities = np.array(base_frequency_probabilities)
base_frequency_probabilities = [
np.mean(base_frequency_probabilities[:, i])
for i in range(len(boundaries))
]
probability_of_breach = GraphInterface.get_probability_of_breach(size, industry)
if probability_of_breach:
log.info(
"Found specific >0 breaches probability value for one or both "
"of ('%s', '%s'), calculating follow-on values...",
size,
industry,
)
# Sets the probability of having 0 breaches.
breach_frequency_probabilities = [(100 - probability_of_breach) / 100]
# Calculates the remaining probabilities proportional to the sum
# >0 breaches probability.
for base_frequency_probability in base_frequency_probabilities[0]:
breach_frequency_probabilities.append(
(probability_of_breach * base_frequency_probability) / 100
)
if len(breach_frequency_probabilities) != len(boundaries):
raise Exception("Mismatched boundaries!")
return breach_frequency_probabilities
log.info("No breach probability value found.")
return None
# pylint: disable=too-many-branches,too-many-locals,too-many-statements
@staticmethod
def get_probability_of_breach(size="All", industry="All") -> float:
"""
Returns the probability of an organisation of a given size and/or
industry experiencing a breach with an outcome in the next year.
Where a match exists for both size and industry, size is chosen as it
assumed that organisations of a similar size will have a more similar
threat model than organisations within the same industry. This assumption
is not empirically grounded, however, so it may be that the opposite
is true.
"""
size_probability = None
industry_probability = None
size_node = GraphInterface._get_node("Size", name=size)
if size_node:
log.info("Found node for size '%s'.", size)
else:
log.info("No node found for size '%s'.", size)
industry_node = GraphInterface._get_node("Industry", name=industry)
if industry_node:
log.info("Found node for industry '%s'.", industry)
else:
log.info("No node found for industry '%s'.", industry)
# If no figures were found for this pairing, returns None.
if size_node is None and industry_node is None:
return None
if size_node:
size_relations = GraphInterface.g.match({size_node}, r_type=FOR_SIZE)
size_probabilities = []
for rel in size_relations:
if rel.start_node.has_label("IncidentProbability"):
size_probabilities.append(rel.start_node["probability"])
if len(size_probabilities) > 1:
log.info(
"Multiple probabilities found for size '%s', averaging...", size
)
size_probability = sum(size_probabilities) / len(size_probabilities)
elif len(size_probabilities) == 1:
log.info("Probability value found for size '%s'.", size)
size_probability = size_probabilities[0]
else:
log.info("No probability value found for size '%s'.", size)
if industry_node:
industry_relations = GraphInterface.g.match(
{industry_node}, r_type=FOR_INDUSTRY
)
industry_probabilities = []
for rel in industry_relations:
if rel.start_node.has_label("IncidentProbability"):
industry_probabilities.append(rel.start_node["probability"])
if len(industry_probabilities) > 1:
log.info(
"Multiple probabilities found for industry '%s', averaging...",
industry,
)
industry_probability = sum(industry_probabilities) / len(
industry_probabilities
)
elif len(industry_probabilities) == 1:
log.info("Probability value found for industry '%s'.", industry)
industry_probability = industry_probabilities[0]
else:
log.info("No probability value found for industry '%s'.", industry)
if size_probability and industry_probability:
log.info(
"Probabilities found for both size '%s' and industry '%s', averaging...",
size,
industry,
)
probability = (size_probability + industry_probability) / 2
else:
probability = size_probability or industry_probability
return probability
# pylint: enable=too-many-branches,too-many-locals,too-many-statements
# pylint: disable=too-many-branches,too-many-locals,too-many-statements
@staticmethod
def get_incident_cost_averages(
pairing: Tuple = ("All", "All")
) -> Tuple[float, float]:
"""
Attempts to get the average incident costs over a year, specific to the
organisational details provided.
The CSBS specifies figures for breaches both 'with' and 'without outcomes'.
We have ignored the latter here.
"""
size = pairing[0]
industry = pairing[1]
size_mean = None
size_median = None
industry_mean = None
industry_median = None
log.info(
"Attempting to get incident cost averages specific to ('%s', '%s')...",
size,
industry,
)
size_node = GraphInterface._get_node("Size", name=size)
if size_node:
log.info("Found node for size '%s'.", size)
else:
log.info("No node found for size '%s'.", size)
industry_node = GraphInterface._get_node("Industry", name=industry)
if industry_node:
log.info("Found node for industry '%s'.", industry)
else:
log.info("No node found for industry '%s'.", industry)
# If no figures were found for this pairing, returns None.
if size_node is None and industry_node is None:
return None
if size_node:
size_relations = GraphInterface.g.match({size_node}, r_type=FOR_SIZE)
size_means = []
size_medians = []
for rel in size_relations:
if rel.start_node.has_label("IncidentCostAverages"):
size_means.append(rel.start_node["mean"])
size_medians.append(rel.start_node["median"])
# Converts however many mean and median values returned into one of
# each.
if len(size_means) > 1:
log.info("Multiple mean values found for size '%s', averaging...", size)
size_mean = sum(size_means) / len(size_means)
elif len(size_means) == 1:
log.info("Mean value found for size '%s'.", size)
size_mean = size_means[0]
else:
log.info("No mean values found for size '%s'.", size)
if len(size_medians) > 1:
log.info(
"Multiple median values found for size '%s', averaging...", size
)
size_median = sum(size_medians) / len(size_medians)
elif len(size_medians) == 1:
log.info("Median value found for size '%s'.", size)
size_median = size_medians[0]
else:
log.info("No median values found for size '%s'.", size)
if industry_node:
industry_relations = GraphInterface.g.match(
{industry_node}, r_type=FOR_INDUSTRY
)
industry_means = []
industry_medians = []
for rel in industry_relations:
if rel.start_node.has_label("IncidentCostAverages"):
industry_means.append(rel.start_node["mean"])
industry_medians.append(rel.start_node["median"])
# Converts however many mean and median values returned into one of
# each.
if len(industry_means) > 1:
log.info(
"Multiple mean values found for industry '%s', averaging...",
industry,
)
industry_mean = sum(industry_means) / len(industry_means)
elif len(industry_means) == 1:
log.info("Mean value found for industry '%s'.", industry)
industry_mean = industry_means[0]
else:
log.info("No mean values found for industry '%s'.", industry)
if len(industry_medians) > 1:
log.info(
"Multiple median values found for industry '%s', averaging...",
industry,
)
industry_median = sum(industry_medians) / len(industry_medians)
elif len(industry_medians) == 1:
log.info("Median value found for industry '%s'.", industry)
industry_median = industry_medians[0]
else:
log.info("No median values found for industry '%s'.", industry)
if size_mean and industry_mean:
log.info(
"Mean values found for both size '%s' and industry '%s', averaging...",
size,
industry,
)
mean = (size_mean + industry_mean) / 2
else:
mean = size_mean or industry_mean
if size_median and industry_median:
log.info(
"Median values found for both size '%s' and industry '%s', averaging...",
size,
industry,
)
median = (size_median + industry_median) / 2
else:
median = size_median or industry_median
return mean, median
# pylint: enable=too-many-branches,too-many-locals,too-many-statements
# pylint: disable=invalid-name,anomalous-backslash-in-string
@staticmethod
def get_incident_frequency_distribution(
pairing: Tuple = ("All", "All")
) -> Union[Tuple[float, float], None]:
"""
Returns the most relevant available incident frequency distribution for
a given pairing.
The algorithm for determining this is currently very basic:
1. search for an exact match for the pairing, and return that if found; else
2. return the distribution for :math:`\left(\text{All}, \text{All}\right)`.
In future, this can and should be expanded to follow complex heuristics
for similarity (and some relationships for doing so are provided at the
end of this module). For example, two industries can be joined using the
SIMILAR_TO relationship, which would allow the algorithm to traverse
laterally to other leaf nodes.
An even simpler improvement would be to add handling for partial matches
(e.g., returning :math:`\left(\text{Micro}, \text{All}\right)`, which
should be more relevant to a :math:`\left(\text{Micro}, \text{IT}\right)`
organisation than the fallback :math:`\left(\text{All}, \text{All}\right)`
values will be.
"""
# pylint: enable=anomalous-backslash-in-string
size = pairing[0]
industry = pairing[1]
size_node = GraphInterface._get_node("Size", name=size)
if size_node:
log.info("Found node for size '%s'.", size)
else:
log.info("No node found for size '%s'.", size)
industry_node = GraphInterface._get_node("Industry", name=industry)
if industry_node:
log.info("Found node for industry '%s'.", industry)
else:
log.info("No node found for industry '%s'.", industry)
# If no figures were found for this pairing, returns the fallback values.
if size_node is None and industry_node is None:
return GraphInterface._get_frequency_distribution()
dist: Union[
Dict[float, float], None
] = GraphInterface._get_frequency_distribution(size, industry)
if dist is not None:
log.debug(
"Returned values are: a = %s, b = %s", str(dist["a"]), str(dist["b"])
)
return dist
# pylint: enable=invalid-name
# pylint: disable=anomalous-backslash-in-string
@staticmethod
def get_incident_costs_distribution(
pairing: Tuple = ("All", "All")
) -> Union[Tuple[float, float], None]:
"""
Returns the most relevant available incident costs distribution for
a given pairing.
The algorithm for determining this is currently very basic:
1. search for an exact match for the pairing, and return that if found; else
2. return the distribution for :math:`\left(\text{All}, \text{All}\right)`.
In future, this can and should be expanded to follow complex heuristics
for similarity (and some relationships for doing so are provided at the
end of this module). For example, two industries can be joined using the
SIMILAR_TO relationship, which would allow the algorithm to traverse
laterally to other leaf nodes.
An even simpler improvement would be to add handling for partial matches
(e.g., returning :math:`\left(\text{Micro}, \text{All}\right)`, which
should be more relevant to a :math:`\left(\text{Micro}, \text{IT}\right)`
organisation than the fallback :math:`\left(\text{All}, \text{All}\right)`
values will be.
"""
# pylint: enable=anomalous-backslash-in-string
size = pairing[0]
industry = pairing[1]
size_node = GraphInterface._get_node("Size", name=size)
if size_node:
log.info("Found node for size '%s'.", size)
else:
log.info("No node found for size '%s'.", size)
industry_node = GraphInterface._get_node("Industry", name=industry)
if industry_node:
log.info("Found node for industry '%s'.", industry)
else:
log.info("No node found for industry '%s'.", industry)
# If no figures were found for this pairing, returns the fallback values.
if size_node is None and industry_node is None:
return GraphInterface._get_costs_distribution()
dist: Union[Dict[float, float], None] = GraphInterface._get_costs_distribution(
size, industry
)
if dist is not None:
log.debug(
"Returned values are: mean = %s, stddev = %s",
str(dist["mean"]),
str(dist["stddev"]),
)
return dist
@staticmethod
def get_sizes() -> List[str]:
"""Returns a list of all of the organisation size values."""
nodes = GraphInterface._get_nodes("Size")
return [node["name"] for node in nodes]
@staticmethod
def get_industries() -> List[str]:
"""Returns a list of all of the organisation industry values."""
nodes = GraphInterface._get_nodes("Industry")
return [node["name"] for node in nodes]
@staticmethod
def get_sizes_and_industries() -> Tuple[list, list]:
"""Returns all available organisation size and industry values."""
return GraphInterface.get_sizes(), GraphInterface.get_industries()
# pylint: disable=invalid-name
@staticmethod
def create_incident_frequency_distribution_node(
pairing: Tuple, a: float, b: float
) -> Node:
"""Adds an `IncidentFrequencyDistribution` node to the Neo4j graph database."""
size_node = GraphInterface._get_node("Size", name=pairing[0])
industry_node = GraphInterface._get_node("Industry", name=pairing[1])
node = GraphInterface._create_node(
"IncidentFrequencyDistribution", a=a, b=b, calculated_at=datetime.now()
)
GraphInterface._create_relationship(node, FOR_SIZE, size_node)
GraphInterface._create_relationship(node, FOR_INDUSTRY, industry_node)
return node
# pylint: enable=invalid-name
@staticmethod
def create_incident_costs_distribution_node(
pairing: Tuple, mean: float, stddev: float
) -> Node:
"""Adds an `IncidentCostsDistribution` node to the Neo4j graph database."""
size_node = GraphInterface._get_node("Size", name=pairing[0])
industry_node = GraphInterface._get_node("Industry", name=pairing[1])
node = GraphInterface._create_node(
"IncidentCostsDistribution",
mean=mean,
stddev=stddev,
calculated_at=datetime.now(),
)
GraphInterface._create_relationship(node, FOR_SIZE, size_node)
GraphInterface._create_relationship(node, FOR_INDUSTRY, industry_node)
return node
# pylint: disable=anomalous-backslash-in-string,invalid-name
@staticmethod
def _get_frequency_distribution(
size: str = "All", industry: str = "All"
) -> Dict[float, float]:
"""
Returns the :math:`a` and :math:`b` values from the requested incident
frequency distribution node (if it exists). Call with no arguments to
use the fallback (:math:`\left(\text{All}, \text{All}\right)`) node.
"""
# pylint: enable=anomalous-backslash-in-string
# pylint: disable=line-too-long
result = GraphInterface.g.run(
"MATCH (:Size {{name:'{}'}})<-[:FOR_SIZE]-(node:IncidentFrequencyDistribution)-[:FOR_INDUSTRY]->(:Industry {{name:'{}'}}) "
"RETURN node;".format(size, industry)
)
# pylint: enable=line-too-long
nodes = [record["node"] for record in result]
if len(nodes) == 0:
# There should always be a (All, All) distribution at least.
if size == "All" and industry == "All":
raise Exception("No fallback node found!")
log.debug(
"No incident frequency distribution found for (%s, %s).",
str(size),
str(industry),
)
return None, None
log.debug("Results: %s", str(nodes))
a = [node["a"] for node in nodes]
b = [node["b"] for node in nodes]
if len(nodes) > 0:
log.info("Multiple fallback nodes found, averaging parameters...")
a = sum(a) / len(a)
b = sum(b) / len(b)
else:
a = a[0]
b = b[0]
return {"a": a, "b": b}
# pylint: enable=invalid-name
# pylint: disable=anomalous-backslash-in-string
@staticmethod
def _get_costs_distribution(
size: str = "All", industry: str = "All"
) -> Dict[float, float]:
"""
Returns the :math:`a` and :math:`b` values from the requested incident
frequency distribution node (if it exists). Call with no arguments to
use the fallback (:math:`\left(\text{All}, \text{All}\right)`) node.
"""
# pylint: enable=anomalous-backslash-in-string
# pylint: disable=line-too-long
result = GraphInterface.g.run(
"MATCH (:Size {{name:'{}'}})<-[:FOR_SIZE]-(node:IncidentCostsDistribution)-[:FOR_INDUSTRY]->(:Industry {{name:'{}'}}) "
"RETURN node;".format(size, industry)
)
# pylint: enable=line-too-long
nodes = [record["node"] for record in result]
if len(nodes) == 0:
# There should always be a (All, All) distribution at least.
if size == "All" and industry == "All":
raise Exception("No fallback node found!")
log.debug(
"No incident frequency distribution found for (%s, %s).",
str(size),
str(industry),
)
return None, None
log.debug("Results: %s", str(nodes))
mean = [node["mean"] for node in nodes]
stddev = [node["stddev"] for node in nodes]
if len(nodes) > 1:
log.info("Multiple fallback nodes found, averaging parameters...")
mean = sum(mean) / len(mean)
stddev = sum(stddev) / len(stddev)
else:
mean = mean[0]
stddev = stddev[0]
return {"mean": mean, "stddev": stddev}
# pylint: disable=invalid-name
@staticmethod
def _create_node(*labels, **properties) -> Node:
"""Creates a new node in the Neo4j graph database."""
tx = GraphInterface.g.begin()
node = Node(*labels, **properties)
tx.create(node)
tx.commit()
return node
# pylint: enable=invalid-name
# pylint: disable=invalid-name
@staticmethod
def _create_relationship(
start_node, relationship, end_node, **properties
) -> Relationship:
"""Creates a new relationship in the Neo4j graph database."""
tx = GraphInterface.g.begin()
relationship = Relationship(
start_node, relationship.__name__, end_node, **properties
)
tx.create(relationship)
tx.commit()
return relationship
# pylint: enable=invalid-name
@staticmethod
def _get_node(*labels, **properties) -> Union[Node, None]:
"""Returns a node from the Neo4j graph database."""
return GraphInterface.g.nodes.match(*labels, **properties).first()
@staticmethod
def _get_nodes(*labels, **properties) -> NodeMatcher:
"""Returns a node from the Neo4j graph database."""
return GraphInterface.g.nodes.match(*labels, **properties)
@staticmethod
def _dict_to_jsobj(properties) -> str:
"""Recursively converts a Python `dict` into a JS `Object`."""
if isinstance(properties, dict):
return re.sub("'([a-z_]*)':", "\\1:", str(properties))
if isinstance(properties, str):
return GraphInterface._dict_to_jsobj({"name": properties})
return "{}"
# pylint: disable=invalid-name,missing-class-docstring
class SUBSECTION_OF(Relationship):
pass
class SECTION_OF(Relationship):
pass
class SIMILAR_TO(Relationship):
pass
class FOR_SIZE(Relationship):
pass
class FOR_INDUSTRY(Relationship):
pass
# pylint: enable=invalid-name,missing-class-docstring

420
src/montecarlo.py Normal file
View File

@ -0,0 +1,420 @@
"""
Monte Carlo Simulation Script
This script runs a Monte Carlo simulation for an organisation of a given
size and industry, utilising the most relevant available available.
Acknowledgements: Dr Dan Prince & Dr Chris Sherlock
"""
import os
import sys
import argparse
import pickle
import logging as log
from typing import Tuple, Dict, Union
import random
import math
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from graph import GraphInterface as gi
# Used for logging, equivalent to `logging.INFO`.
SUCCESS = 20
# If not specified, the default number of Monte Carlo simulation runs to perform.
DEFAULT_RUNS = 5000
# The arbitrary maximum number of incidents that an organisation can experience
# in a year.
MAX_ANNUAL_INCIDENTS = 8000
# The maximum value of a company; any yearly losses over result in a bankruptcy
COMPANY_VALUE = 100000
# A smaller value increases the curviness of the loss exeedence curve.
# Less than 30 starts to get a bit steppy though.
LEC_PRECISION = math.floor(COMPANY_VALUE / 30)
# Quantifies the quantitative boundaries for human-readable incident frequencies,
# which many sources (e.g., the CSBS 2020) use to present their results.
#
# 'None' = 0
# 'Annually' = 1
# 'Less than monthly' = 27
# 'Monthly' = 817
# 'Weekly' = 1879
# 'Daily' = 80399
# 'More than daily' = 4008000
BOUNDARIES = {
"None": 0,
"Once per year": 1,
"Less than once a month": 2,
"Once a month": 8,
"Once a week": 18,
"Once a day": 80,
"Several times a day": 400,
"MAX": MAX_ANNUAL_INCIDENTS,
}
N = None
OUTPUT_DIR = None
IMAGES = None
FORCE = None
def _calculate_num_of_incidents(incidents_dist: Dict[float, float]) -> float:
"""Calculate how many incidents have occurred in a given year."""
log.debug("Incident distribution: %s", str(incidents_dist))
num_of_incidents = incidents_dist["b"] / (1 - np.random.uniform()) ** (
1 / incidents_dist["a"]
)
log.debug("Number of incidents (as `int`): %s", str(int(num_of_incidents)))
return (
int(num_of_incidents)
if num_of_incidents <= MAX_ANNUAL_INCIDENTS
else MAX_ANNUAL_INCIDENTS
)
def _calculate_sum_cost_of_incidents(
num_of_incidents: int, costs_dist: Dict[float, float], idx: int = None
) -> float:
"""For a list of incident numbers, calculate how much each breach cost and
return the sum."""
log.debug("Costs distribution: %s", str(costs_dist))
if (N < 1000) or (N >= 1000 and idx % math.floor(N / 100) == 0):
log.info(
"Running Monte Carlo simulation... (%s/%s iterations)", str(idx), str(N)
)
if num_of_incidents == 0:
return 0
loc = np.log(
costs_dist["mean"] ** 2
/ np.sqrt(costs_dist["stddev"] ** 2 + costs_dist["mean"] ** 2)
)
shape = np.sqrt(np.log(1 + (costs_dist["stddev"] ** 2 / costs_dist["mean"] ** 2)))
costs = [random.lognormvariate(loc, shape) for r in range(num_of_incidents)]
return sum(costs)
# pylint: disable=invalid-name
def _get_most_relevant_incident_frequency_distribution(
pairing: Tuple = ("All", "All")
) -> Union[Dict[float, float], None]:
"""Gets the distribution for incident frequency from the data in the Neo4j
graph database."""
log.info(
"Finding most relevant incident frequency distribution for %s...", str(pairing)
)
return gi.get_incident_frequency_distribution(pairing)
# pylint: enable=invalid-name
def _get_most_relevant_incident_costs_distribution(
pairing: Tuple = ("All", "All")
) -> Union[Dict[float, float], None]:
"""Gets the distribution for incident costs from the data in the Neo4j
graph database."""
log.info(
"Finding most relevant incident costs distribution for %s...", str(pairing)
)
return gi.get_incident_costs_distribution(pairing)
def _get_most_relevant_distributions(
pairing: Tuple = ("All", "All")
) -> Dict[Union[Dict[float, float], None], Union[Dict[float, float], None]]:
"""Generate (or retrieve) a population of annual incident quantities and a
distribution of incident-with-outcome cost values."""
# -- caching --
# Retrieves previously-calculated values if possible
if not FORCE and OUTPUT_DIR is not None:
try:
filename = "{}-{}.pickle".format(pairing[0], pairing[1])
dists = pickle.load(open(OUTPUT_DIR + filename, "rb"))
log.info("Previously-calculated distributions found")
return dists["incidents"], dists["costs"]
except (OSError, IOError):
log.info("Previously-calculated distributions not found")
# Otherwise, generates fresh ones
gi.__init__()
incidents_dist = _get_most_relevant_incident_frequency_distribution(pairing)
costs_dist = _get_most_relevant_incident_costs_distribution(pairing)
log.debug(
"Returned values are: incidents_dist = %s, costs_dist = %s",
str(incidents_dist),
str(costs_dist),
)
# Saves the figures for faster analysis in future
if OUTPUT_DIR is not None and incidents_dist is not None and costs_dist is not None:
dists = {
"incidents": incidents_dist,
"costs": costs_dist,
}
filename = "{}-{}.pickle".format(pairing[0], pairing[1])
pickle.dump(dists, open(OUTPUT_DIR + filename, "wb"))
return incidents_dist, costs_dist
# pylint: disable=anomalous-backslash-in-string
def _run_monte_carlo_simulation(pairing: Tuple = ("All", "All")) -> None:
"""
Runs :math:`n` simulations of a 12-month period, calculating the number
of incidents encountered each time and their cumulative costs.
"""
# pylint: enable=anomalous-backslash-in-string
# Generates both distributions
incidents_dist, costs_dist = _get_most_relevant_distributions(pairing)
if incidents_dist is None and costs_dist is None:
return incidents_dist, costs_dist
# Calculates the number of incidents suffered over $n$ simulated years
nums_of_incidents = np.array(
[_calculate_num_of_incidents(incidents_dist) for i in range(N)]
)
log.debug("Number of incidents: %s", str(nums_of_incidents))
_label_plot(
"Histogram of Incident Frequencies (over 12 months)",
"Number of Incidents ($log_{10}$)",
"Frequency",
)
plt.hist(
[np.log10(i) if i > 0 else 0 for i in nums_of_incidents],
align="left",
bins=range(12),
)
_save_plot("2 - histogram of incident frequencies")
# Calculates the annual costs for each simulated year
log.info("Running Monte Carlo simulation... (0/%s iterations)", str(N))
sum_costs = [
_calculate_sum_cost_of_incidents(num_of_incidents, costs_dist, idx)
for idx, num_of_incidents in enumerate(nums_of_incidents, start=1)
]
log.info("Running Monte Carlo simulation... (%s/%s iterations)", str(N), str(N))
_label_plot(
"Histogram of Sum Costs (over 12 months)", "Total Cost (£)", "Frequency"
)
plt.ticklabel_format(style="plain")
plt.hist(sum_costs, align="left", bins=15, range=(0, COMPANY_VALUE))
_save_plot("4 - histogram of sum costs")
_label_plot("Density of Sum Costs (over 12 months)", "Total Cost (£)", "Density")
pd.Series(sum_costs).plot(kind="density")
plt.xlim(0, COMPANY_VALUE * 2)
plt.ticklabel_format(style="plain")
_save_plot("5 - density of sum costs")
# Get loss exceedance curve
log.info("Generating loss exceedance curve")
hist, edges = np.histogram(sum_costs, bins=LEC_PRECISION)
cumrev = np.cumsum(hist[::-1])[::-1] * 100 / len(sum_costs)
_label_plot(
"Loss Exceedance Curve (Monte Carlo sim)",
"Loss (£, 99th percentile)",
"Chance of Loss or Greater (%)",
)
plt.ticklabel_format(style="plain")
plt.xlim(0, COMPANY_VALUE)
plt.plot(edges[:-1], cumrev)
_save_plot("6 - lec" if IMAGES else "lec")
log.info("Simulation complete!")
return nums_of_incidents, sum_costs
def main():
"""Called when the script is run from the command-line"""
# pylint: disable=global-statement
global N, OUTPUT_DIR, IMAGES, FORCE
# pylint: enable=global-statement
parser = argparse.ArgumentParser()
parser.add_argument(
"-n",
"--number",
help="The number of simulations to run (default: " + str(DEFAULT_RUNS) + ")",
type=int,
default=DEFAULT_RUNS,
)
parser.add_argument(
"-s",
"--size",
help="The size of the organisation to simulate (default: all)",
type=str,
default="All",
)
parser.add_argument(
"-i",
"--industry",
help="The industry of the organisation to simulate (default: all)",
type=str,
default="All",
)
parser.add_argument(
"-o",
"--output",
help="Specify the output directory (default: ./output/)",
type=str,
default=os.path.join(os.path.dirname(__file__), "output/"),
metavar="DIRECTORY",
)
parser.add_argument(
"-p",
"--images",
help="Output images at each step of the script (default: false, just \
output the final LEC image)",
action="store_true",
default=False,
)
parser.add_argument(
"-f",
"--force",
help="Force re-generation of incident and cost distributions (default: false)",
action="store_true",
default=False,
)
parser.add_argument(
"-v",
"--verbose",
help="Verbose console output (default: false)",
action="store_true",
default=False,
)
parser.add_argument(
"-d",
"--debug",
help="Show debug console output (default: false)",
action="store_true",
default=False,
)
args = parser.parse_args()
N = args.number
OUTPUT_DIR = args.output
IMAGES = args.images
FORCE = args.force
size = args.size
industry = args.industry
if args.debug:
log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
log.info("Debug output.")
elif args.verbose:
log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO)
log.info("Verbose output.")
else:
log.basicConfig(format="%(levelname)s: %(message)s")
if not os.path.isdir(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
if size or industry:
print("Running simulation for ({}, {})".format(size, industry))
nums_of_incidents, sum_costs = _run_monte_carlo_simulation((size, industry))
if nums_of_incidents is not None and sum_costs is not None:
log.info(
"Results:\nNumbers of incidents: %s\nSum costs: %s\n",
str(nums_of_incidents),
str(sum_costs),
)
avg_num_of_incidents = int(sum(nums_of_incidents) / len(nums_of_incidents))
avg_sum_costs = sum(sum_costs) / len(sum_costs)
log.log(
SUCCESS,
"Results:\nAverage number of incidents: %d\nAverage cost: £%.2f",
avg_num_of_incidents,
avg_sum_costs,
)
# Print output that will be picked up by game server.
# pylint: disable=fixme
# TODO: For some reason the results at the moment are orders of magnitude
# too high, so for now I've plugged it by dividing both results by 100.
# pylint: enable=fixme
print(int(avg_num_of_incidents / 100))
print("%.2f" % (avg_sum_costs / 100))
else:
log.warning("No data found.")
print("No data found.")
print("Running simulation for (All, All)")
gen_nums_of_incidents, gen_sum_costs = _run_monte_carlo_simulation()
log.info(
"Results:\nNumbers of incidents: %s\nSum costs: %s\n",
str(gen_nums_of_incidents),
str(gen_sum_costs),
)
avg_gen_num_of_incidents = int(
sum(gen_nums_of_incidents) / len(gen_nums_of_incidents)
)
avg_gen_sum_costs = sum(gen_sum_costs) / len(gen_sum_costs)
log.log(
SUCCESS,
"Results:\nAverage number of incidents: %d\nAverage cost: £%.2f",
avg_gen_num_of_incidents,
avg_gen_sum_costs,
)
# Print output that will be picked up by the game server.
print(int(avg_gen_num_of_incidents / 100))
print("%.2f" % (avg_gen_sum_costs / 100))
sys.exit(0)
def _label_plot(title="Untitled Plot", xlabel="x axis", ylabel="y axis") -> None:
"""Apply titles and axis labels to a plot."""
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
def _save_plot(filename="untitled") -> None:
"""Save a plot and clear the figure."""
if IMAGES:
plt.savefig(OUTPUT_DIR + filename + ".png")
plt.clf()
if __name__ == "__main__":
main()

103
src/montecarlo.r Normal file
View File

@ -0,0 +1,103 @@
#
# Secure Digitalisation Monte Carlo Simulation Script
#
# This script runs a Monto Carlo simulation using breach likelihood and cost
# figures derived from the Cyber Security Breaches Survey 2020 (CSBS).
# This script is an unfinished prototype, and has since been superseded by
# `montecarlo.py`.
#
# Acknowledgements: Dr Dan Prince & Dr Chris Sherlock
#
masses = c(0.54, 0.1058, 0.1012, 0.0966, 0.069, 0.0368, 0.0414)
boundaries = c(1, 2, 8, 18, 80, 400, 8000)
Fs = cumsum(masses)
plot(log(boundaries), log(1 - Fs))
xs = log(boundaries)
ys = log(1 - Fs)
fit = lm(ys ~ xs)
summary(fit)
alogb = fit$coeff[1]
a = -fit$coeff[2]
b = exp(alogb/a)
print(a)
print(b)
n = 10000
us = runif(n)
xs = b / (1 - us)^(1 / a)
print()
p0 = mean(xs < boundaries[1])
p1 = mean(xs < boundaries[2]) - p0
p2 = mean(xs < boundaries[3]) - p0 - p1
p3 = mean(xs < boundaries[4]) - p0 - p1 - p2
p4 = mean(xs < boundaries[5]) - p0 - p1 - p2 - p3
p5 = mean(xs < boundaries[6]) - p0 - p1 - p2 - p3 - p4
ps = c(p0, p1, p2, p3, p4, p5, 1 - (p0 + p1 + p2 + p3 + p4 + p5))
print(ps)
print(masses)
nattacks = floor(xs)
hist(log10(nattacks),
main = "Histogram of Number of Attacks/Breaches Over 12 Months",
xlab = expression("Number of Attacks (log"[10]*")"),
ylab = "Frequency",
breaks = 0:12)
# Plots the distribution for the average cost of breach(es) over 12 months
mean = 3230
median = 274
logstd = sqrt(2 * (log(mean) - if (median == 0) 0 else log(median)))
std = exp(1)^logstd
curve(dlnorm(x, log(mean), log(std)), from=1, to=5000,
main = "Average annual breach cost distribution",
xlab = 'Cost (£)',
ylab = 'Density',
lwd = 2)
# Runs the MonteCarlo simulation
simulateCosts <- function(n) {
return(if (n >= 1) sum(rlnorm(n, loc, shape)) else 0)
}
n = 10000
loc <- log(mean^2 / sqrt(std^2 + mean^2))
shape <- sqrt(log(1 + (std^2 / mean^2)))
numAttacks <- sample(log10(nattacks), n)
results <- sapply(numAttacks, simulateCosts)
hist(results,
main = "Histogram of Total Costs Over 12 Months (Monte Carlo sim)",
xlab = "Total cost (£)")
d <- density(results)
plot(d,
main="Density of Total Costs Over 12 Months (Monte Carlo sim)",
xlab=expression("Total Cost (£)"),
ylab="Density")
# Get loss exceedance
# TODO: needs to be prettier, but `evaluate::loss_exceedance_curve()` is broken
maxValue = 2500
numOver <- length(results[results > maxValue])
risk = numOver/n
plot(d,
main="Loss Exceedance (Monte Carlo sim)",
xlab=expression("Total Cost (£)"),
ylab="Density")
abline(v = maxValue, col="red", lwd=3, lty=2)
text(3000, 4e-04, labels=paste(floor(risk*100), "% chance of ≥£", maxValue, " losses"), adj=c(0, 0.5))

View File

@ -0,0 +1,78 @@
#
# Secure Digitalisation Neo4j Connection Script
#
# This script is intended to establish a connection to a Neo4j graph database
# and submit commands.
# This script is an unfinished prototype, and has since been superseded by
# `graph.py`.
#
install.packages('tidyverse')
library(tidyverse)
install.packages('purrr')
library(purrr)
install.packages('devtools')
library(devtools)
install_github("davidlrosenblum/neo4r@4.x")
library(neo4r)
RUNS <- 1000
DECISION.STEPS <- 12
get_likelihood <- function() {
res <- 'MATCH (i:Incident) WHERE EXISTS (i.probability) AND NOT (i)-[:FOR_SIZE]-() AND NOT (i)-[:FOR_INDUSTRY]-() AND NOT (i)-[:FOR_AREA]-() RETURN i.probability AS probability;' %>%
call_neo4j(con, type = 'row')
res$probability / 100
}
# Currently only does direct costs
get_costs <- function() {
res <- 'MATCH (i:Incident) WHERE EXISTS (i.direct_costs) AND NOT (i)-[:FOR_SIZE]-() AND NOT (i)-[:FOR_INDUSTRY]-() AND NOT (i)-[:FOR_AREA]-() RETURN i.direct_costs[0] AS cost;' %>%
call_neo4j(con, type = 'row')
res$cost
}
calculate_cost <- function(alpha) {
l <- get_likelihood()
happen <- runif(1, 0, 1)
if (happen >= l) {
cost <- as.numeric(get_costs())
s <- log(sd(580:630))
m <- log(get_costs())
#location <- log(m^2 / sqrt(s^2 + m^2))
#shape <- sqrt(log(1 + (s^2 / m^2)))
rlnorm(1, )
} else {
0
}
}
con <- neo4j_api$new(
url="http://localhost:7474",
db="neo4j",
user="neo4j",
password="password"
)
simulations <- rerun(RUNS, replicate(DECISION.STEPS, runif(1) %>% calculate_cost())) %>%
set_names(paste0("sim", 1:RUNS)) %>%
map(~ accumulate(., ~ .x * .y)) %>%
map_dfr(~ tibble(value = .x, step = 1:DECISION.STEPS), .id = "simulation")
simulations %>%
ggplot(aes(x = step, y = value)) +
geom_line(aes(color = simulation)) +
theme(legend.position = "none") +
ggtitle("Simulations of costs from breaches")
summary_values <- simulations %>%
group_by(step) %>%
summarise(mean_return = mean(value), max_return = max(value), min_return = min(value)) %>%
gather("series", "value", -step)
summary_values %>%
ggplot(aes(x = step, y = value)) +
geom_line(aes(color = series)) +
ggtitle("Mean values from simulations")

View File

@ -0,0 +1,332 @@
"""
Distributions (Re)generation Script
This script generates likelihood and cost distributions based on threat
intelligence data stored in a connected Neo4j graph database. It attempts to
do so for every possible permutation of (size, industry) values.
These are then consumed by `montecarlo.py`, which runs a Monte Carlo
simulation based on these figures.
Acknowledgements: Dr Dan Prince & Dr Chris Sherlock
"""
import os
import sys
import argparse
import warnings
import logging as log
from typing import Tuple
import itertools
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf
from matplotlib import pyplot as plt
from scipy.stats import lognorm
from graph import GraphInterface as gi
# Used for logging, equivalent to `logging.WARNING` + 1.
SUCCESS = 31
# The arbitrary maximum number of incidents that an organisation can experience
# in a year.
MAX_ANNUAL_INCIDENTS = 8000
# Quantifies the quantitative boundaries for human-readable incident frequencies,
# which many sources (e.g., the CSBS 2020) use to present their results.
#
# 'None' = 0
# 'Annually' = 1
# 'Less than monthly' = 27
# 'Monthly' = 817
# 'Weekly' = 1879
# 'Daily' = 80399
# 'More than daily' = 4008000
BOUNDARIES = {
"None": 0,
"Once per year": 1,
"Less than once a month": 2,
"Once a month": 8,
"Once a week": 18,
"Once a day": 80,
"Several times a day": 400,
"MAX": MAX_ANNUAL_INCIDENTS,
}
OUTPUT_DIR = None
IMAGES = None
# pylint: disable=invalid-name,anomalous-backslash-in-string
def _generate_new_incident_frequency_distribution(pairing: Tuple = (None, None)) -> int:
"""
Generates a new incident frequency distribution.
Notes
-----
(Re)generates the incident frequency distribution for a
:math:`\left(\text{size}, \text{industry}\right)` pairing from the data in
a Neo4j graph database.
Currently this only produces log-normal distributions. Additional types of
distribution can be implemented by overloading this method (by importing the
`multipledispatch` package) and returning the values required for defining
that distribution (e.g., :math:`\mu` and :math:`\sigma` instead of :math:`a`
and :math:`b`).
"""
# pylint: enable=anomalous-backslash-in-string
log.info("Generating new incident frequency distribution for '%s'...", str(pairing))
# Attempts to get the incident probabilities for the pairing from the graph
# database
incident_frequency_probabilities = gi.get_incident_frequency_probabilities(
list(BOUNDARIES.values())[:-1], pairing
)
if incident_frequency_probabilities is None:
log.info(
"No incident frequency distribution generated for '%s'.",
str(pairing),
)
return 0
log.debug(
"Returned values are: incident frequency probabilities = %s",
str(incident_frequency_probabilities),
)
# If values are found, generate a distribution
Fs = np.cumsum(incident_frequency_probabilities)
xs = np.log(list(BOUNDARIES.values())[1:])
ys = np.log(1 - Fs)
data = pd.DataFrame(xs, ys)
# pylint: disable=line-too-long
# See <https://www.statsmodels.org/stable/_modules/statsmodels/stats/stattools.html#omni_normtest> for explanation
# pylint: enable=line-too-long
with warnings.catch_warnings():
warnings.simplefilter("ignore")
fit = smf.ols(formula="ys ~ xs", data=data).fit()
log.debug(fit.summary())
# Get the parameters for the generated distribution and store them in the
# graph database.
alogb = fit.params[0]
a = -fit.params[1]
b = np.exp(alogb / a)
gi.create_incident_frequency_distribution_node(pairing, a, b)
log.log(
SUCCESS,
"New incident frequency distribution successfully generated for '%s'.",
str(pairing),
)
return 1
# pylint: enable=invalid-name
# pylint: disable=anomalous-backslash-in-string
def _generate_new_incident_costs_distribution(pairing: Tuple = (None, None)) -> int:
"""
(Re)generates the incident cost distribution for a
:math:`\left(\text{size}, \text{industry}\right)` pairing from the data in
a Neo4j graph database.
Currently this only produces log-normal distributions. Additional types of
distribution can be implemented by overloading this method (by importing the
`multipledispatch` package) and returning the values required for defining
that distribution (e.g., :math:`\mu` and :math:`\sigma` instead of :math:`a`
and :math:`b`).
"""
# pylint: enable=anomalous-backslash-in-string
# Plots the distribution for the average cost of incident(s) over 12 months
log.info("Generating new incident cost distribution for '%s'...", str(pairing))
incident_mean_cost, incident_median_cost = gi.get_incident_cost_averages(pairing)
if incident_mean_cost is None or incident_median_cost is None:
log.info(
"No incident costs distribution generated for '%s'.",
str(pairing),
)
return 0
log.debug(
"Returned values are: mean = %s, median = %s",
str(incident_mean_cost),
str(incident_median_cost),
)
log_stddev = np.sqrt(
2
* (
np.log(incident_mean_cost) - 0
if (incident_median_cost == 0)
else np.log(incident_median_cost)
)
)
stddev = np.exp(1) ** log_stddev
_label_plot(
"Average annual incident-with-outcome cost distribution", "Cost (£)", "Density"
)
plt.plot(
[
lognorm.pdf(
np.log(i),
np.log(incident_mean_cost),
np.log(incident_median_cost) if incident_median_cost > 0 else 0,
)
for i in range(1, 2500)
]
)
_save_plot("3 - cost dist")
gi.create_incident_costs_distribution_node(pairing, incident_mean_cost, stddev)
log.log(
SUCCESS,
"New incident costs distribution successfully generated for '%s'.",
str(pairing),
)
return 1
def _generate_new_distributions(pairing: Tuple = (None, None)) -> Tuple:
"""(Re)generates the cost and likelihood distributions."""
gi.__init__()
log.info("Existing distributions deleted: %s", bool(gi.delete_distributions()))
successful_incidents_dists = 0
successful_costs_dists = 0
# If either size or industry is unspecified, gets all possible values.
sizes = gi.get_sizes() if pairing[0] is None else [pairing[0]]
industries = gi.get_industries() if pairing[1] is None else [pairing[1]]
# Attempts to generate new distributions for every combination of size and
# industry values.
for pair in list(itertools.product(sizes, industries)):
successful_incidents_dists += _generate_new_incident_frequency_distribution(
pair
)
successful_costs_dists += _generate_new_incident_costs_distribution(pair)
return successful_incidents_dists, successful_costs_dists
def main():
"""Called when the script is run from the command-line."""
# pylint: disable=global-statement
global OUTPUT_DIR, IMAGES
# pylint: enable=global-statement
parser = argparse.ArgumentParser()
parser.add_argument(
"-s",
"--size",
help="Specify the org. size (default: None)",
choices=["micro", "small", "medium", "large"],
type=str,
default=None,
)
parser.add_argument(
"-i",
"--industry",
help="Specify the org. industry SIC code (top-level only, e.g. C for "
"Manufacturing) (default: None)",
choices=list(map(chr, range(65, 86))),
type=chr,
default=None,
)
parser.add_argument(
"-o",
"--output",
help="Specify the output directory (default: ./output/)",
type=str,
default=os.path.join(os.path.dirname(__file__), "output/"),
metavar="DIRECTORY",
)
parser.add_argument(
"-p",
"--images",
help="Output images at each step of the script (default: false, just "
"output the final LEC image)",
action="store_true",
default=False,
)
parser.add_argument(
"-v",
"--verbose",
help="Verbose console output (default: false)",
action="store_true",
default=False,
)
parser.add_argument(
"-d",
"--debug",
help="Show debug console output (default: false)",
action="store_true",
default=False,
)
args = parser.parse_args()
OUTPUT_DIR = args.output
IMAGES = args.images
size = args.size
industry = args.industry
if args.debug:
log.basicConfig(format="%(levelname)s: %(message)s", level=log.DEBUG)
log.info("Debug output.")
elif args.verbose:
log.basicConfig(format="%(levelname)s: %(message)s", level=log.INFO)
log.info("Verbose output.")
else:
log.basicConfig(format="%(levelname)s: %(message)s")
if not os.path.isdir(OUTPUT_DIR):
os.makedirs(OUTPUT_DIR)
incidents_dists, costs_dists = _generate_new_distributions((size, industry))
log.log(
SUCCESS,
"Successfully generated %s incident frequency distributions and %s "
"incident costs distributions!",
str(incidents_dists),
str(costs_dists),
)
sys.exit(0)
def _label_plot(title="Untitled Plot", xlabel="x axis", ylabel="y axis") -> None:
"""Apply titles and axis labels to a plot."""
plt.title(title)
plt.xlabel(xlabel)
plt.ylabel(ylabel)
def _save_plot(filename="untitled") -> None:
"""Save a plot and clear the figure."""
if IMAGES:
plt.savefig(OUTPUT_DIR + filename + ".png")
plt.clf()
if __name__ == "__main__":
main()