(DIO-834) Refactor to use pdb queries, output to GCS Bucket

2026-01-26 10:18:41 -05:00 · 2020-08-25 05:36:41 -07:00 · 2020-08-25 05:36:41 -07:00 · fd1ad61fce
commit fd1ad61fce
parent 854daa0d46
9 changed files with 107 additions and 60 deletions
--- a/infinitory/cli.py
+++ b/infinitory/cli.py
@ -19,9 +19,10 @@ import sys
 from infinitory import cellformatter
 from infinitory.inventory import Inventory
 from simplepup import puppetdb
+from pypuppetdb import connect
+from google.cloud import storage

-
-def output_html(inventory, directory):
+def output_html(inventory, directory, bucket_name):
    if os.path.isdir(directory):
        shutil.rmtree(directory)
    os.mkdir(directory, 0o755)
@ -32,6 +33,7 @@ def output_html(inventory, directory):

    with open("{}/pygments.css".format(directory), "w", encoding="utf-8") as css:
        css.write(pygments.formatters.HtmlFormatter().get_style_defs('.codehilite'))
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/pygments.css".format(directory), destination_blob_name="pygments.css")

    os.mkdir("{}/errors".format(directory), 0o755)
    os.mkdir("{}/nodes".format(directory), 0o755)
@ -43,6 +45,7 @@ def output_html(inventory, directory):
            render_template("home.html",
                path="",
                generation_time=generation_time))
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/index.html".format(directory), destination_blob_name="index.html")

    report_columns = [
        cellformatter.Fqdn("facts", "fqdn"),
@ -71,6 +74,7 @@ def output_html(inventory, directory):
                generation_time=generation_time,
                columns=unique_error_columns,
                errors=unique_errors))
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/errors/index.html".format(directory), destination_blob_name="errors/index.html")

    all_error_columns = [
        cellformatter.Base("other", "message"),
@ -87,6 +91,8 @@ def output_html(inventory, directory):
                generation_time=generation_time,
                columns=all_error_columns,
                errors=unique_errors))
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/errors/all.html".format(directory), destination_blob_name="errors/all.html")
+

    with open("{}/nodes/index.html".format(directory), "w", encoding="utf-8") as html:
        html.write(
@ -95,6 +101,8 @@ def output_html(inventory, directory):
                generation_time=generation_time,
                columns=report_columns,
                nodes=nodes))
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/nodes/index.html".format(directory), destination_blob_name="nodes/index.html")
+

    all_columns = [
        cellformatter.Base("facts", "fqdn"),
@ -124,6 +132,8 @@ def output_html(inventory, directory):
        csv_writer.writerow([cell.head_csv() for cell in all_columns])
        for node in nodes:
            csv_writer.writerow([cell.body_csv(node) for cell in all_columns])
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/nodes.csv".format(directory), destination_blob_name="nodes.csv")
+

    write_json(nodes, directory, "index")

@ -136,6 +146,7 @@ def output_html(inventory, directory):
                    generation_time=generation_time,
                    columns=all_columns[1:],
                    node=node))
+        gcs_upload(bucket_name=bucket_name, source_file_name="{}/nodes/{}.html".format(directory, node["certname"]), destination_blob_name="nodes/{}.html".format(node["certname"]))

    os.mkdir("{}/roles".format(directory), 0o755)
    with open("{}/roles/index.html".format(directory), "w", encoding="utf-8") as html:
@ -144,6 +155,8 @@ def output_html(inventory, directory):
                path="../",
                generation_time=generation_time,
                roles=inventory.sorted_roles()))
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/roles/index.html".format(directory), destination_blob_name="roles/index.html")
+

    os.mkdir("{}/services".format(directory), 0o755)
    sorted_services = inventory.sorted_services()
@ -154,6 +167,8 @@ def output_html(inventory, directory):
                path="../",
                generation_time=generation_time,
                services=sorted_services))
+    gcs_upload(bucket_name=bucket_name, source_file_name="{}/services/index.html".format(directory), destination_blob_name="services/index.html")
+

    for service in sorted_services:
        path = "{}/services/{}.html".format(directory, service["class_name"])
@ -163,6 +178,7 @@ def output_html(inventory, directory):
                    path="../",
                    generation_time=generation_time,
                    service=service))
+        gcs_upload(bucket_name=bucket_name, source_file_name="{}/services/{}.html".format(directory, service["class_name"]), destination_blob_name="services/{}.html".format(service["class_name"]))


 def render_template(template_name, **kwargs):
@ -216,13 +232,24 @@ def write_json(nodes, directory, filename):
    with open(path, "w", encoding="utf-8") as json_out:
        json_out.write(json.dumps(nodes))

+def gcs_upload(bucket_name, source_file_name, destination_blob_name):
+    """Uploads a file to the bucket."""
+
+    storage_client = storage.Client()
+    bucket = storage_client.bucket(bucket_name)
+    blob = bucket.blob(destination_blob_name)
+
+    blob.upload_from_filename(source_file_name)
+    
@click.command()
@click.option("--output", "-o", required=True, metavar="PATH", help="Directory to put report in. WARNING: this directory will be removed if it already exists.")
@click.option("--host", "-h", default="localhost", metavar="HOST", help="PuppetDB host to query")
+@click.option("--token", "-t", default="123token", metavar="TOKEN", help="RBAC auth token to use")
@click.option("--verbose", "-v", default=False, is_flag=True)
@click.option("--debug", "-d", default=False, is_flag=True)
+@click.option("--bucket", "-b", default="bucket", metavar="BUCKET", help="Bucket to save files to, such as GCS")
@click.version_option()
-def main(host, output, verbose, debug):
+def main(host, token, output, bucket, verbose, debug ):
    """Generate SRE inventory report"""
    if debug:
        set_up_logging(logging.DEBUG)
@ -231,20 +258,20 @@ def main(host, output, verbose, debug):
    else:
        set_up_logging(logging.WARNING)

+    pupdb = connect(host=host, port=8081, timeout=30, token=token)
    try:
        inventory = Inventory(debug=debug)
        inventory.add_active_filter()
+        
+        inventory.load_nodes(pupdb)
+        inventory.load_errors(pupdb)
+        inventory.load_backups(pupdb)
+        inventory.load_logging(pupdb)
+        inventory.load_metrics(pupdb)
+        inventory.load_monitoring(pupdb)
+        inventory.load_roles(pupdb)

-        with puppetdb.AutomaticConnection(host) as pupdb:
-            inventory.load_nodes(pupdb)
-            inventory.load_errors(pupdb)
-            inventory.load_backups(pupdb)
-            inventory.load_logging(pupdb)
-            inventory.load_metrics(pupdb)
-            inventory.load_monitoring(pupdb)
-            inventory.load_roles(pupdb)
-
-        output_html(inventory, output)
+        output_html(inventory, output, bucket_name=bucket)
    except socket.gaierror as e:
        sys.exit("PuppetDB connection (Socket): {}".format(e))
    except paramiko.ssh_exception.SSHException as e:
--- a/infinitory/errors.py
+++ b/infinitory/errors.py
@ -32,21 +32,22 @@ class ErrorParser(object):
    def load_reports(self, pupdb):
        """ I didn't use a subquery because it takes much longer than loading
        the reports one by one """
-        for report in pupdb.query('nodes[certname, latest_report_hash] { }'):
-            cache_file = "%s/%s" % (self.reports_cache_path, report["latest_report_hash"])
-            if os.path.isfile(cache_file):
-                full_report = pickle.load(open(cache_file, "rb"))
-                if self.debug:
-                    sys.stdout.write('#')
-            else:
-                query = 'reports[] { hash = "%s" }' % report["latest_report_hash"]
-                full_report = pupdb.query(query)
-                pickle.dump( full_report, open(cache_file, "wb" ) )
-                if self.debug:
-                    sys.stdout.write('.')
-            sys.stdout.flush()
+        for report in pupdb._query('nodes', query='["extract", ["certname", "latest_report_hash"]]'):
+            if report["latest_report_hash"] != None:
+                cache_file = "%s/%s" % (self.reports_cache_path, report["latest_report_hash"])
+                if os.path.isfile(cache_file):
+                    full_report = pickle.load(open(cache_file, "rb"))
+                    if self.debug:
+                        sys.stdout.write('#')
+                else:
+                    query = '["=", "hash", "%s"]' % report["latest_report_hash"]
+                    full_report = pupdb._query('reports', query=query)
+                    pickle.dump( full_report, open(cache_file, "wb" ) )
+                    if self.debug:
+                        sys.stdout.write('.')
+                sys.stdout.flush()

-            self._reports[report["certname"]] = full_report[0]
+                self._reports[report["certname"]] = full_report[0]

    def common_error_prefixes(self):
        return [
--- a/infinitory/inventory.py
+++ b/infinitory/inventory.py
@ -1,10 +1,10 @@
 from collections import defaultdict
 from operator import itemgetter
 from simplepup import puppetdb
+from pypuppetdb import connect

 import infinitory.errors as errors

-
 class Inventory(object):
    def __init__(self, filters=set(), debug=False):
        self.debug = debug
@ -21,16 +21,16 @@ class Inventory(object):

    def load_nodes(self, pupdb):
        self.nodes = dict()
-        for node in pupdb.query(self.filter('inventory {}')):
+        for node in pupdb._query('inventory'):
            node["other"] = defaultdict(list)
            self.nodes[node["certname"]] = node

    def query_classes(self, pupdb, class_name):
        return self.query_resources(pupdb,
-            'title="%s" and type="Class"' % class_name)
+            '["and", ["=", "title", "%s"], ["=", "type", "Class"]]' % class_name)

    def query_resources(self, pupdb, condition, include_absent=False):
-        for resource in pupdb.query(self.filter('resources {}', condition)):
+        for resource in pupdb._query('resources', query=condition):
            if not include_absent:
                if resource["parameters"].get("ensure", None) == "absent":
                    continue
@ -41,7 +41,7 @@ class Inventory(object):
                continue

    def load_backups(self, pupdb):
-        for node, resource in self.query_resources(pupdb, 'type="Backup::Job"'):
+        for node, resource in self.query_resources(pupdb, '["=", "type", "Backup::Job"]'):
            paths = resource["parameters"]["files"]
            if type(paths) is list:
                node["other"]["backups"].extend(paths)
@ -86,7 +86,7 @@ class Inventory(object):
    def load_roles(self, pupdb):
        self.roles = defaultdict(list)

-        condition = 'type = "Class" and title ~ "^Role::"'
+        condition = '["and", ["=", "type", "Class"], ["~", "title", "^Role::"]]'
        for node, resource in self.query_resources(pupdb, condition):
            if resource["title"] not in ("role", "role::delivery"):
                node["other"]["roles"].append(resource["title"])