diff --git a/README.md b/README.md index 7ee6833..f7255e3 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,14 @@ This is a [Singer](https://singer.io) tap that produces JSON-formatted data following the [Singer spec](https://github.com/singer-io/getting-started/blob/master/SPEC.md). +## Important notice + +- This repository's default branch [`legacy-stable`](https://github.com/MeltanoLabs/tap-gitlab/tree/legacy-stable) is kept for compatibility reasons but is no longer under active development. +- New development is being performed against the [`main`](https://github.com/MeltanoLabs/tap-gitlab/tree/main) branch, which is based on a port to the Meltano SDK in Pull Request #65. +- For a stable experience, users of this tap should begin pinning their installations to a specific [release](https://github.com/MeltanoLabs/tap-gitlab/releases) instead of branch references. More instructions are provided within the `README.md` of the `main` branch. + +## About this tap + It is based on v0.5.1 of , but contains [many additional improvements](./CHANGELOG.md). This tap: @@ -26,6 +34,8 @@ This tap: - [Epics](https://docs.gitlab.com/ee/api/epics.html) (only available for GitLab Ultimate and GitLab.com Gold accounts) - [Epic Issues](https://docs.gitlab.com/ee/api/epic_issues.html) (only available for GitLab Ultimate and GitLab.com Gold accounts) - [Vulnerabilities](https://docs.gitlab.com/ee/api/project_vulnerabilities.html) + - [Group Variables](https://docs.gitlab.com/ee/api/group_level_variables.html) + - [Project Variables](https://docs.gitlab.com/ee/api/project_level_variables.html) - Outputs the schema for each resource - Incrementally pulls data based on the input state @@ -68,7 +78,9 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git "start_date": "2018-01-01T00:00:00Z", "ultimate_license": true, "fetch_merge_request_commits": false, - "fetch_pipelines_extended": false + "fetch_pipelines_extended": false, + "fetch_group_variables": false, + "fetch_project_variables": false } ``` @@ -80,6 +92,10 @@ pip install git+https://gitlab.com/meltano/tap-gitlab.git If `fetch_pipelines_extended` is true (defaults to false), then for every Pipeline fetched with `sync_pipelines` (which returns N pages containing all pipelines per project), also fetch extended details of each of these pipelines with `sync_pipelines_extended`. Similar concerns as those related to `fetch_merge_request_commits` apply here - every pipeline fetched with `sync_pipelines_extended` requires a separate API call. + If `fetch_group_variables` is true (defaults to false), then Group-level CI/CD variables will be retrieved for each available / specified group. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Group-level CI/CD variables. + + If `fetch_project_variables` is true (defaults to false), then Project-level CI/CD variables will be retrieved for each available / specified project. This feature is treated as an opt-in to prevent users from accidentally extracting any potential secrets stored as Project-level CI/CD variables. + 4. [Optional] Create the initial state file You can provide JSON file that contains a date for the API endpoints diff --git a/setup.py b/setup.py index 491421c..c360d84 100644 --- a/setup.py +++ b/setup.py @@ -11,10 +11,10 @@ classifiers=['Programming Language :: Python :: 3 :: Only'], py_modules=['tap_gitlab'], install_requires=[ - 'singer-python==5.9.1', - 'requests==2.20.0', + 'singer-python==6.0.1', + 'requests==2.32.0', 'strict-rfc3339==0.7', - 'backoff==1.8.0', + 'backoff==2.2.1', 'psutil==5.8.0', 'gitlocal@git+https://{}@github.com/minwareco/gitlocal.git'.format(os.environ.get("GITHUB_TOKEN", "")) ], @@ -40,6 +40,8 @@ "tags.json", "releases.json", "vulnerabilities.json", + "project_variables.json", + "group_variables.json" ], }, include_package_data=True, diff --git a/tap_gitlab/__init__.py b/tap_gitlab/__init__.py index a109139..23499cc 100644 --- a/tap_gitlab/__init__.py +++ b/tap_gitlab/__init__.py @@ -29,7 +29,12 @@ 'private_token': None, 'start_date': None, 'groups': '', - 'ultimate_license': False + 'ultimate_license': False, + 'fetch_merge_request_commits': False, + 'fetch_merge_request_notes': False, + 'fetch_pipelines_extended': False, + 'fetch_group_variables': False, + 'fetch_project_variables': False, } STATE = {} CATALOG = None @@ -132,6 +137,7 @@ def load_schema(entity): 'url': '/groups/{id}/subgroups', 'schema': load_schema('groups'), 'key_properties': ['id'], + 'replication_method': 'FULL_TABLE', }, 'group_milestones': { 'url': '/groups/{id}/milestones', @@ -225,16 +231,34 @@ def load_schema(entity): 'key_properties': ['id'], 'replication_method': 'FULL_TABLE', }, - 'vulnerabilities': { - 'url': '/projects/{id}/vulnerabilities', - 'schema': load_schema('vulnerabilities'), - 'key_properties': ['id'], - 'replication_method': 'FULL_TABLE', - }, + 'vulnerabilities': { + 'url': '/projects/{id}/vulnerabilities', + 'schema': load_schema('vulnerabilities'), + 'key_properties': ['id'], + 'replication_method': 'FULL_TABLE', + }, + 'project_variables': { + 'url': '/projects/{id}/variables', + 'schema': load_schema('project_variables'), + 'key_properties': ['project_id', 'key'], + 'replication_method': 'FULL_TABLE', + }, + 'group_variables': { + 'url': '/groups/{id}/variables', + 'schema': load_schema('group_variables'), + 'key_properties': ['group_id', 'key'], + 'replication_method': 'FULL_TABLE', + } } ULTIMATE_RESOURCES = ("epics", "epic_issues") -STREAM_CONFIG_SWITCHES = ('merge_request_commits', 'merge_request_notes', 'pipelines_extended') +STREAM_CONFIG_SWITCHES = ( + 'merge_request_commits', + 'merge_request_notes', + 'pipelines_extended', + 'group_variables', + 'project_variables', +) LOGGER = singer.get_logger() SESSION = requests.Session() @@ -276,17 +300,15 @@ def get_start(entity): STATE[entity] = max(dates_to_compare).isoformat() return STATE[entity] - -# TODO : when singer-python updates the backoff module -# we should update this to pull the exact time to wait from the header -@backoff.on_predicate(backoff.expo, - lambda x: x.status_code == 429, - max_tries=10, - jitter=backoff.random_jitter) +@backoff.on_predicate(backoff.runtime, + predicate=lambda r: r.status_code == 429, + max_tries=5, + value=lambda r: int(r.headers.get("Retry-After")), + jitter=None) @backoff.on_exception(backoff.expo, (requests.exceptions.RequestException), max_tries=5, - giveup=lambda e: e.response is not None and 400 <= e.response.status_code < 500, # pylint: disable=line-too-long + giveup=lambda e: e.response is not None and e.response.status_code != 429 and 400 <= e.response.status_code < 500, # pylint: disable=line-too-long factor=2) def request(url, params=None): params = params or {} @@ -307,12 +329,6 @@ def request(url, params=None): LOGGER.info("Skipping request to {}".format(url)) LOGGER.info("Reason: {} - {}".format(resp.status_code, resp.content)) raise ResourceInaccessible - # if we are being rate limited, let the backoff logic run - elif resp.status_code != 429 and resp.status_code >= 400: - LOGGER.critical( - "Error making request to GitLab API: GET {} [{} - {}]".format( - url, resp.status_code, resp.content)) - sys.exit(1) return resp @@ -1004,6 +1020,8 @@ def sync_group(gid, pids, gitLocal): sync_labels(data, "group") + sync_variables(data, "group") + if CONFIG['ultimate_license']: sync_epics(data) @@ -1017,6 +1035,8 @@ def sync_group(gid, pids, gitLocal): def sync_pipelines(project): entity = "pipelines" stream = CATALOG.get_stream(entity) + + LOGGER.info('Stream Pipelines: {}'.format(stream.is_selected())) if stream is None or not stream.is_selected(): return @@ -1030,7 +1050,12 @@ def sync_pipelines(project): with Transformer(pre_hook=format_timestamp) as transformer: for row in gen_request(url): - transformed_row = transformer.transform(row, RESOURCES[entity]["schema"], mdata) + pipeline_record = { + **row, + 'project_id': project['id'], + '_sdc_repository': project['path_with_namespace'] + } + transformed_row = transformer.transform(pipeline_record, RESOURCES[entity]["schema"], mdata) # Write the Pipeline record singer.write_record(entity, transformed_row, time_extracted=utils.now()) @@ -1058,8 +1083,12 @@ def sync_pipelines_extended(project, pipeline): with Transformer(pre_hook=format_timestamp) as transformer: for row in gen_request(url): - row['project_id'] = project['id'] - transformed_row = transformer.transform(row, RESOURCES[entity]["schema"], mdata) + pipeline_extended_record = { + **row, + 'project_id': project['id'], + '_sdc_repository': project['path_with_namespace'] + } + transformed_row = transformer.transform(pipeline_extended_record, RESOURCES[entity]["schema"], mdata) singer.write_record(entity, transformed_row, time_extracted=utils.now()) @@ -1088,13 +1117,13 @@ def sync_jobs(project, pipeline): url = get_url(entity=entity, id=project['id'], secondary_id=pipeline['id']) with Transformer(pre_hook=format_timestamp) as transformer: for row in gen_request(url): - row['project_id'] = project['id'] - flatten_id(row, 'user') - flatten_id(row, 'commit') - flatten_id(row, 'pipeline') - flatten_id(row, 'runner') - - transformed_row = transformer.transform(row, RESOURCES[entity]['schema'], mdata) + job_record = { + **row, + 'project_id': project['id'], + 'pipeline_id': pipeline['id'], + '_sdc_repository': project['path_with_namespace'] + } + transformed_row = transformer.transform(job_record, RESOURCES[entity]['schema'], mdata) singer.write_record(entity, transformed_row, time_extracted=utils.now()) def write_repository(raw_repo): @@ -1123,6 +1152,21 @@ def write_repository(raw_repo): rec = transformer.transform(repo, Schema.to_dict(stream.schema), metadata=metadata.to_map(stream.metadata)) singer.write_record('repositories', rec, time_extracted=extraction_time) +def sync_variables(entity, element="project"): + stream_name = "{}_variables".format(element) + stream = CATALOG.get_stream(stream_name) + if stream is None or not stream.is_selected(): + return + mdata = metadata.to_map(stream.metadata) + + url = get_url(entity=element + "_variables", id=entity['id']) + + with Transformer(pre_hook=format_timestamp) as transformer: + for row in gen_request(url): + row[element + '_id'] = entity['id'] + transformed_row = transformer.transform(row, RESOURCES[element + "_variables"]["schema"], mdata) + singer.write_record(element + "_variables", transformed_row, time_extracted=utils.now()) + def sync_project(pid, gitLocal): url = get_url(entity="projects", id=pid) @@ -1190,6 +1234,8 @@ def sync_project(pid, gitLocal): sync_tags(data) sync_pipelines(data) sync_vulnerabilities(data) + sync_variables(data) + def do_sync(): LOGGER.info("Starting sync") @@ -1285,6 +1331,11 @@ def main_impl(): CONFIG.update(args.config) CONFIG['ultimate_license'] = truthy(CONFIG['ultimate_license']) + CONFIG['fetch_merge_request_commits'] = truthy(CONFIG['fetch_merge_request_commits']) + CONFIG['fetch_merge_request_notes'] = truthy(CONFIG['fetch_merge_request_notes']) + CONFIG['fetch_pipelines_extended'] = truthy(CONFIG['fetch_pipelines_extended']) + CONFIG['fetch_group_variables'] = truthy(CONFIG['fetch_group_variables']) + CONFIG['fetch_project_variables'] = truthy(CONFIG['fetch_project_variables']) if '/api/' not in CONFIG['api_url']: CONFIG['api_url'] += '/api/v4' diff --git a/tap_gitlab/schemas/group_variables.json b/tap_gitlab/schemas/group_variables.json new file mode 100644 index 0000000..2352ed0 --- /dev/null +++ b/tap_gitlab/schemas/group_variables.json @@ -0,0 +1,26 @@ +{ + "type": "object", + "properties": { + "group_id": { + "type": ["null", "integer"] + }, + "variable_type": { + "type": ["null", "string"] + }, + "key": { + "type": ["null", "string"] + }, + "value": { + "type": ["null", "string"] + }, + "protected": { + "type": ["null", "boolean"] + }, + "masked": { + "type": ["null", "boolean"] + }, + "environment_scope": { + "type": ["null", "string"] + } + } +} diff --git a/tap_gitlab/schemas/jobs.json b/tap_gitlab/schemas/jobs.json index cf2f370..b17beb5 100644 --- a/tap_gitlab/schemas/jobs.json +++ b/tap_gitlab/schemas/jobs.json @@ -1,35 +1,204 @@ { "type": "object", "properties": { - "id": { - "type": ["null", "integer"] + "_sdc_repository": { + "type": "string" }, "project_id": { - "type": ["null", "integer"] + "type": "integer" + }, + "pipeline_id": { + "type": "integer" + }, + "id": { + "type": "integer" }, "user_id": { - "type": ["null", "integer"] + "type": [ + "null", + "integer" + ] }, - "commit_id": { - "type": ["null", "string"] + "commit": { + "type": "object", + "properties": { + "author_email": { + "type": "string" + }, + "author_name": { + "type": "string" + }, + "created_at": { + "type": "string" + }, + "id": { + "type": "string" + }, + "message": { + "type": "string" + }, + "short_id": { + "type": "string" + }, + "title": { + "type": "string" + } + } }, - "pipeline_id": { - "type": ["null", "integer"] + "pipeline": { + "type": "object", + "properties": { + "id": { + "type": "integer" + }, + "project_id": { + "type": "integer" + }, + "ref": { + "type": "string" + }, + "sha": { + "type": "string" + }, + "status": { + "type": "string" + } + } + }, + "runner": { + "type": [ + "null", + "object" + ], + "properties": { + "id": { + "type": "integer" + }, + "description": { + "type": [ + "null", + "string" + ] + }, + "ip_address": { + "type": [ + "null", + "string" + ] + }, + "active": { + "type": "boolean" + }, + "paused": { + "type": "boolean" + }, + "is_shared": { + "type": "boolean" + }, + "runner_type": { + "type": "string" + }, + "name": { + "type": [ + "null", + "string" + ] + }, + "online": { + "type": "boolean" + }, + "status": { + "type": "string" + } + } }, - "runner_id": { - "type": ["null", "integer"] + "runner_manager": { + "type": [ + "null", + "object" + ], + "properties": { + "id": { + "type": "integer" + }, + "system_id": { + "type": "string" + }, + "version": { + "type": [ + "null", + "string" + ] + }, + "revision": { + "type": [ + "null", + "string" + ] + }, + "platform": { + "type": [ + "null", + "string" + ] + }, + "architecture": { + "type": [ + "null", + "string" + ] + }, + "is_shared": { + "type": "boolean" + }, + "ip_address": { + "type": [ + "null", + "string" + ] + }, + "status": { + "type": "string" + }, + "created_at": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "contacted_at": { + "type": [ + "null", + "string" + ], + "format": "date-time" + } + } }, "name": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "stage": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "ref": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "web_url": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "status": { "anyOf": [ @@ -52,74 +221,142 @@ } ] }, - "duration": { - "anyOf": [ - { - "type": "number", - "format": "float" - }, - { - "type": "null" - } + "failure_reason": { + "type": [ + "null", + "string" ] }, + "tag": { + "type": "boolean" + }, + "duration": { + "type": [ + "null", + "number" + ], + "format": "float" + }, + "queued_duration": { + "type": [ + "null", + "number" + ], + "format": "float" + }, "coverage": { - "anyOf": [ - { - "type": "number", - "format": "float" - }, - { - "type": "null" - } - ] + "type": [ + "null", + "number" + ], + "format": "float" + }, + "archived": { + "type": "boolean" }, "allow_failure": { - "type": ["null", "boolean"] + "type": [ + "null", + "boolean" + ] }, "created_at": { - "anyOf": [ - { - "type": "string", - "format": "date-time" - }, - { - "type": "null" - } - ] + "type": [ + "null", + "string" + ], + "format": "date-time" }, "started_at": { - "anyOf": [ - { - "type": "string", - "format": "date-time" - }, - { - "type": "null" - } - ] + "type": [ + "null", + "string" + ], + "format": "date-time" }, "finished_at": { - "anyOf": [ - { - "type": "string", - "format": "date-time" - }, - { - "type": "null" + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "erased_at": { + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "artifacts": { + "type": "array", + "items": { + "type": "object", + "properties": { + "file_type": { + "type": "string" + }, + "size": { + "type": "integer" + }, + "filename": { + "type": "string" + }, + "file_format": { + "type": [ + "null", + "string" + ] + } } - ] + } }, "artifacts_expire_at": { - "anyOf": [ - { - "type": "string", - "format": "date-time" + "type": [ + "null", + "string" + ], + "format": "date-time" + }, + "tag_list": { + "items": { + "type": "string" + }, + "type": "array" + }, + "project": { + "type": "object", + "properties": { + "ci_job_token_scope_enabled": { + "type": "boolean" + } + } + }, + "user": { + "type": "object", + "properties": { + "name": { + "type": [ + "null", + "string" + ] }, - { - "type": "null" + "username": { + "type": "string" + }, + "id": { + "type": "integer" + }, + "state": { + "type": "string" + }, + "avatar_url": { + "type": "string" + }, + "web_url": { + "type": "string" } - ] + } } } -} \ No newline at end of file +} diff --git a/tap_gitlab/schemas/pipelines.json b/tap_gitlab/schemas/pipelines.json index 2ded96b..175f934 100644 --- a/tap_gitlab/schemas/pipelines.json +++ b/tap_gitlab/schemas/pipelines.json @@ -1,20 +1,50 @@ { "type": "object", "properties": { + "_sdc_repository": { + "type": "string" + }, "id": { - "type": ["null", "integer"] + "type": "integer" + }, + "iid": { + "type": "integer" + }, + "project_id": { + "type": "integer" }, "status": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] + }, + "source": { + "type": "string" }, "ref": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "sha": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] + }, + "name": { + "type": [ + "null", + "string" + ] }, "web_url": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "created_at": { "anyOf": [ diff --git a/tap_gitlab/schemas/pipelines_extended.json b/tap_gitlab/schemas/pipelines_extended.json index 1864702..8809aea 100644 --- a/tap_gitlab/schemas/pipelines_extended.json +++ b/tap_gitlab/schemas/pipelines_extended.json @@ -1,44 +1,86 @@ { "type": "object", "properties": { - "project_id": { - "type": ["null", "integer"] + "_sdc_repository": { + "type": "string" }, "id": { - "type": ["null", "integer"] + "type": "integer" + }, + "iid": { + "type": "integer" + }, + "project_id": { + "type": "integer" + }, + "name": { + "type": [ + "null", + "string" + ] }, "status": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] + }, + "source": { + "type": "string" }, "ref": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "sha": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "before_sha": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "tag": { - "type": ["null", "boolean"] + "type": [ + "null", + "boolean" + ] }, "yaml_errors": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] }, "user": { "type": "object", "properties": { "name": { - "type": "string" + "type": [ + "null", + "string" + ] }, "username": { - "type": "string" + "type": "string" }, "id": { - "type": "integer" + "type": "integer" }, "state": { - "type": "string" + "type": "string" + }, + "avatar_url": { + "type": "string" + }, + "web_url": { + "type": "string" } } }, @@ -107,18 +149,63 @@ } ] }, - "coverage": { + "queued_duration": { "anyOf": [ { - "type": "number" + "type": "integer" }, { "type": "null" } ] }, + "coverage": { + "type": [ + "null", + "number" + ], + "format": "float" + }, "web_url": { - "type": ["null", "string"] + "type": [ + "null", + "string" + ] + }, + "detailed_status": { + "type": "object", + "properties": { + "icon": { + "type": "string" + }, + "text": { + "type": "string" + }, + "label": { + "type": "string" + }, + "group": { + "type": "string" + }, + "tooltip": { + "type": "string" + }, + "has_details": { + "type": "boolean" + }, + "details_path": { + "type": "string" + }, + "illustration": { + "type": [ + "null", + "string" + ] + }, + "favicon": { + "type": "string" + } + } } } } diff --git a/tap_gitlab/schemas/project_variables.json b/tap_gitlab/schemas/project_variables.json new file mode 100644 index 0000000..62f0b49 --- /dev/null +++ b/tap_gitlab/schemas/project_variables.json @@ -0,0 +1,26 @@ +{ + "type": "object", + "properties": { + "project_id": { + "type": ["null", "integer"] + }, + "variable_type": { + "type": ["null", "string"] + }, + "key": { + "type": ["null", "string"] + }, + "value": { + "type": ["null", "string"] + }, + "protected": { + "type": ["null", "boolean"] + }, + "masked": { + "type": ["null", "boolean"] + }, + "environment_scope": { + "type": ["null", "string"] + } + } +} diff --git a/tap_gitlab/schemas/projects.json b/tap_gitlab/schemas/projects.json index 159197b..dc177e0 100644 --- a/tap_gitlab/schemas/projects.json +++ b/tap_gitlab/schemas/projects.json @@ -13,6 +13,9 @@ "builds_enabled": { "type": ["null", "boolean"] }, + "ci_config_path": { + "type": ["null", "string"] + }, "container_registry_enabled": { "type": ["null", "boolean"] },