Skip to content

Commit cbe0136

Browse files
authored
Merge pull request #323 from liudmylaru/add_precommit_hooks
Add pre-commit hooks for ocids, copyright and secrets
2 parents 47b59a8 + 40f9f37 commit cbe0136

File tree

4 files changed

+136
-0
lines changed

4 files changed

+136
-0
lines changed

.gitleaks.toml

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
title = "Gitleaks Config"
2+
3+
# Gitleaks feature, extending the existing base config from:
4+
# https://github.com/zricethezav/gitleaks/blob/master/config/gitleaks.toml
5+
[extend]
6+
useDefault = true
7+
8+
# Allowlist's 'stopwords' and 'regexes' excludes any secrets or mathching patterns from the current repository.
9+
# Paths listed in allowlist will not be scanned.
10+
[allowlist]
11+
description = "Global allow list"
12+
stopwords = ["test_password", "sample_key"]
13+
regexes = [
14+
'''example-password''',
15+
'''this-is-not-the-secret''',
16+
'''<redacted>'''
17+
]
18+
paths = [
19+
'''^(actions|ai_services|data|distributed|jobs|labs|model|notebook_lifecycle|pipelines|use_cases)''',
20+
'''^(.git|.pre-commit)''',
21+
'''CODE_OF_CONDUCT.md''',
22+
'''CONTRIBUTING.md''',
23+
'''LICENSE.txt''',
24+
'''package.json''',
25+
'''^README.md''',
26+
'''SECURITY.md''',
27+
'''THIRD_PARTY_LICENSES.TXT''',
28+
]
29+
30+
# Describe rule to search real ocids
31+
[[rules]]
32+
description = "Real ocids"
33+
id = "ocid"
34+
path = '''notebook_examples'''
35+
regex = '''ocid[123]\.[a-z1-9A-Z]*\.oc\d\.[a-z1-9A-Z]*\.[a-z1-9A-Z]+'''
36+
keywords = [
37+
"ocid"
38+
]
39+
40+
# Describe rule to search generic secrets
41+
[[rules]]
42+
description = "Generic secret"
43+
id = "generic-secret"
44+
path = '''notebook_examples'''
45+
regex = '''(?i)((key|api|token|secret|passwd|password|psw|pass|pswd)[a-z0-9_ .\-,]{0,25})(=|>|:=|\|\|:|<=|=>|:).{0,5}['\"]([0-9a-zA-Z!@#$%^&*<>\\\-_.=]{3,100})['\"]'''
46+
entropy = 0
47+
secretGroup = 4
48+
keywords = [
49+
"key","api","token","secret","passwd","password", "psw", "pass", "pswd"
50+
]

.pre-commit-config.yaml

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
# To use:
2+
#
3+
# pre-commit run --all-files # run all hooks on all files
4+
# pre-commit run <HOOK_ID> --all-files # run one hooks on all files
5+
# pre-commit run --files <path_to_file> <path_to_folder/**> # run all hook on files
6+
# pre-commit run <HOOK_ID> --files <path_to_file> <path_to_folder/**> # run one hook on files
7+
#
8+
# Or:
9+
#
10+
# pre-commit install # (runs every time you commit in git)
11+
#
12+
# To update this file:
13+
#
14+
# pre-commit autoupdate
15+
#
16+
# See https://github.com/pre-commit/pre-commit
17+
18+
# Detect hardcoded secrets and ocids in notebook_examples/ folder
19+
repos:
20+
- repo: https://github.com/gitleaks/gitleaks
21+
rev: v8.17.0
22+
hooks:
23+
- id: gitleaks
24+
files: ^notebook_examples/
25+
# Oracle copyright checkers in notebook_examples/ folder
26+
- repo: local
27+
hooks:
28+
- id: check-copyright
29+
name: check-copyright
30+
entry: .pre-commit-scripts/check-copyright.py
31+
language: script
32+
types_or: ['python', 'shell', 'bash']
33+
files: ^notebook_examples/

.pre-commit-hooks.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
- id: check-copyright
2+
name: check-copyright
3+
description: Validate Oracle copyright and license statements
4+
entry: .pre-commit-scripts/check-copyright.py
5+
language: script
6+
types_or: ['python', 'shell', 'bash']
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#!/usr/bin/env python
2+
3+
# Copyright (c) 2023 Oracle and/or its affiliates.
4+
# Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/
5+
6+
import datetime
7+
import os
8+
import sys
9+
10+
CURRENT_YEAR = datetime.date.today().year
11+
12+
### At the beginning of next year line has to be added in this list:
13+
PUBLISHED_LAST_EDITED_YEARS = [
14+
f"Copyright (c) 2020, {CURRENT_YEAR} Oracle and/or its affiliates",
15+
f"Copyright (c) 2021, {CURRENT_YEAR} Oracle and/or its affiliates",
16+
f"Copyright (c) 2022, {CURRENT_YEAR} Oracle and/or its affiliates",
17+
f"Copyright (c) {CURRENT_YEAR} Oracle and/or its affiliates",
18+
]
19+
20+
LICENSE_STATEMENTS = [
21+
"Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl/"
22+
]
23+
24+
def main(filenames) -> int:
25+
phrases = LICENSE_STATEMENTS
26+
years = PUBLISHED_LAST_EDITED_YEARS
27+
retcode = 0
28+
for filename in filenames:
29+
if not os.path.basename(filename).startswith("."):
30+
with open(filename) as inputfile:
31+
content = inputfile.read()
32+
if not any(x in content for x in years):
33+
print(f"{filename}: Year published or year last edited not correct.")
34+
retcode = 1
35+
break
36+
for p in phrases:
37+
if p not in content:
38+
print(f"{filename}: Copyright text missing or incomplete.")
39+
retcode = 1
40+
break
41+
42+
sys.exit(retcode)
43+
44+
45+
if __name__ == "__main__":
46+
filenames = sys.argv
47+
main(filenames)

0 commit comments

Comments
 (0)