src/util/log-scripts/an_catalog.py
author Shawn Walker <shawn.walker@oracle.com>
Wed, 04 Dec 2013 22:32:35 -0800
changeset 2991 75e616731cc3
parent 642 bcf78691fe66
child 3143 f6fac0617411
permissions -rw-r--r--
16635525 EOL of packagemanager and related components

#!/usr/bin/python
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#

#
# Copyright (c) 2008, 2013, Oracle and/or its affiliates. All rights reserved.
#

"""\
an_catalog.py - analyze pre-processed Apache combined log of catalog entries

The catalog operation presents interactions similar to the web-ping interaction.
The only distinct aspect is that, since the agent for a catalog operation is
usually pkg(1), we can assess the distribution across versions of these clients.

Our list of measurements is:

        - distinct operations,
        - operations by distinct IP,
        - active users, defined as users who have contacted the repository twice
          in the past 60 days,
        - operations by country [defer],
        - operations by architecture,
        - package client distribution.

The model is that the summary is printed to standard out, and a fuller report is
printed to catalog.html in the current directory.
"""

import datetime
import fileinput
import getopt
import md5
import os
import re
import socket
import sys
import tempfile
import time

from an_report import *

after = None
before = None
summary_file = None

catalog_by_date = {}
catalog_by_ip = {}
catalog_by_ip_active = {}
catalog_by_country = {}
catalog_by_raw_agent = {}
catalog_by_pkg_version = {}
catalog_by_arch = {}

def report_catalog_by_arch():
        print "<pre>"
        for i in catalog_by_arch.keys():
                print i, catalog_by_arch[i]
        print "</pre>"

def report_catalog_by_raw_agent(summary_file = None):
        print "<pre>"
        for i, n in (sorted(catalog_by_raw_agent.items(), key=lambda(k,v): (v,k))):
                print i, n
        print "</pre>"

def report_catalog_by_pkg_version():
        print "<pre>"
        for i, n in (sorted(catalog_by_pkg_version.items(), key=lambda(k,v): (v,k))):
                print i, n
        print "</pre>"

def report_catalog_by_lang():
        labels = ""
        data = ""
        min = 0
        max = 0

        print "<pre>"
        for i, n in (sorted(catalog_by_lang.items(), key=lambda(k,v): (v,k))):
                if labels == "":
                        labels = "%s" % i
                else:
                        labels += "|%s" %i
                if data == "":
                        data = "%d" % n
                else:
                        data += ",%d" % n

                print i, n
                if n > max:
                        max = n

        print "</pre>"

        url = "cht=p3&chs=800x300&chl=%s&chds=%d,%d&chd=t:%s" % (labels,min,max,data)

        fname = retrieve_chart("http://chart.apis.google.com/chart?%s" % url, "lang")
        print "<img src=\"%s\" />" % fname

def count_catalog(mg, d):

        try:
                catalog_by_date[d.date().isoformat()] += 1
        except KeyError:
                catalog_by_date[d.date().isoformat()] = 1
        try:
                catalog_by_ip[mg["ip"]] += 1
        except KeyError:
                catalog_by_ip[mg["ip"]] = 1


        try:
                if not d in catalog_by_ip_active[mg["ip"]]:
                        catalog_by_ip_active[mg["ip"]].append(d)
        except KeyError:
                catalog_by_ip_active[mg["ip"]] = [d]

        try:
                catalog_by_raw_agent[mg["agent"]] += 1
        except:
                catalog_by_raw_agent[mg["agent"]] = 1

        # Agent-specific measurements.

        agent = pkg_agent_pat.search(mg["agent"])
        if agent == None:
                return

        ag = agent.groupdict()
        try:
                catalog_by_arch[ag["arch"]] += 1
        except KeyError:
                catalog_by_arch[ag["arch"]] = 1
        try:
                catalog_by_pkg_version[ag["pversion"]] += 1
        except KeyError:
                catalog_by_pkg_version[ag["pversion"]] = 1

opts, args = getopt.getopt(sys.argv[1:], "a:b:sw:")

for opt, arg in opts:
        if opt == "-a":
                try:
                        after = datetime.datetime(*(time.strptime(arg, "%Y-%b-%d")[0:6]))
                except ValueError:
                        after = datetime.datetime(*(time.strptime(arg, "%Y-%m-%d")[0:6]))

        if opt == "-b":
                before = arg

        if opt == "-s":
                summary_file = prefix_summary_open("catalog")

        if opt == "-w":
                active_window = arg

host_cache_set_file_name()
host_cache_load()

lastdate = None
lastdatetime = None

for l in fileinput.input(args):
        m = comb_log_pat.search(l)
        if not m:
                continue

        mg = m.groupdict()

        d = None

        if lastdatetime and mg["date"] == lastdate:
                d = lastdatetime
        else:
                d = datetime.datetime(*(time.strptime(mg["date"], "%d/%b/%Y")[0:6]))
                lastdate = mg["date"]
                lastdatetime = d

        if after and d < after:
                continue

        count_catalog(mg, d)

host_cache_save()
catalog_by_country = ip_to_country(catalog_by_ip)

report_section_begin("Catalogs", summary_file = summary_file)
report_cols_begin(summary_file = summary_file)
report_col_begin("l", summary_file = summary_file)
report_by_ip(catalog_by_ip, "catalog", summary_file = summary_file)
report_by_date(catalog_by_date, "catalog", summary_file = summary_file)
report_col_end("l", summary_file = summary_file)
report_col_begin("r", summary_file = summary_file)
report_by_country(catalog_by_country, "catalog", summary_file = summary_file)
report_col_end("r", summary_file = summary_file)
report_cols_end(summary_file = summary_file)

report_by_raw_agent(catalog_by_raw_agent, "catalog", summary_file = summary_file)
report_catalog_by_pkg_version()
report_catalog_by_arch()
report_section_end(summary_file = summary_file)