############# sure configuration file ############# # where the cached OpenURL Context Object Documents are written cachedirectory = # BaseURL which can be used to request usage data resolver = # name of the process that generates the OpenURL Context Object Document generation_process = sure-generate.pl # where the logfiles reside log_directory = # the format of the name of the logfiles, as a regular expression # capture the year, month and day, if they exist in the filename log_file_format.1 = ssl_request_log$ log_file_format.2 = ssl_request_log-(\d\d\d\d)(\d\d)(\d\d) # the web server log line format, as a regular expression # the web_server_line_format_N_captureM keys describe what is captured web_server_line_format.1.regex = ^([^ ]+) ([^ ]+) ([^ ]+) \[([^\]]+)\] "([^"\\]*(?:\\.[^"\\]*)*)" (\d\d\d) ([^ ]+)(?: "(.*?)" "(.*?)")?$ web_server_line_format.1.capture.1 = ip web_server_line_format.1.capture.2 = identity web_server_line_format.1.capture.3 = userid web_server_line_format.1.capture.4 = datetime web_server_line_format.1.capture.5 = request web_server_line_format.1.capture.6 = statuscode web_server_line_format.1.capture.7 = size web_server_line_format.1.capture.8 = referrer web_server_line_format.1.capture.9 = useragent web_server_line_format.2.regex = ^\[([^\]]+)\] ([^ ]+) ([^ ]+) ([^ ]+) ([^ ]+) "([^"\\]*(?:\\.[^"\\]*)*)" ([^ ]+) ([^ ]+) "(.*?)" "(.*?)" (\d+) (\d+) (\d+)$ web_server_line_format.2.capture.1 = datetime web_server_line_format.2.capture.2 = ip web_server_line_format.2.capture.3 = securityprotocol web_server_line_format.2.capture.4 = encryption web_server_line_format.2.capture.5 = server web_server_line_format.2.capture.6 = request web_server_line_format.2.capture.7 = unused1 web_server_line_format.2.capture.8 = unused2 web_server_line_format.2.capture.9 = referrer web_server_line_format.2.capture.10 = useragent web_server_line_format.2.capture.11 = statuscode web_server_line_format.2.capture.12 = size web_server_line_format.2.capture.13 = unused3 # divide the captured parts further, if needed web_server_datetime_format.1.regex = ^(\d+)/(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)/(\d\d\d\d):(\d\d):(\d\d):(\d\d) (.*) web_server_datetime_format.1.capture.1 = day web_server_datetime_format.1.capture.2 = shortmonth web_server_datetime_format.1.capture.3 = year web_server_datetime_format.1.capture.4 = hour web_server_datetime_format.1.capture.5 = minute web_server_datetime_format.1.capture.6 = seconds web_server_datetime_format.1.capture.7 = timezone web_server_request_format.1.regex = ^GET ([^ ]+) (.*)$ web_server_request_format.1.capture.1 = requesturl web_server_request_format.1.capture.2 = requestprotocol web_server_request_format.2.regex = ^GET /(?:dspace/)?(handle|bitstream)/ web_server_request_format.2.capture.1 = type web_server_request_format.3.regex = ^GET /(?:dspace/)?(?:handle|bitstream)/(\d+/\d+) web_server_request_format.3.capture.1 = handle web_server_ip_format.1.regex = ^(\d+\.\d+\.\d+)\.\d+$ web_server_ip_format.1.capture.1 = cclasssubnet # translate some strings to another: translate is the prefix, shortmonth is the # old name of the key and Jan is a possible value. month is the new name of the # new key and 1 is the new value for that key translate.shortmonth.Jan = month=01 translate.shortmonth.Feb = month=02 translate.shortmonth.Mar = month=03 translate.shortmonth.Apr = month=04 translate.shortmonth.May = month=05 translate.shortmonth.Jun = month=06 translate.shortmonth.Jul = month=07 translate.shortmonth.Aug = month=08 translate.shortmonth.Sep = month=09 translate.shortmonth.Oct = month=10 translate.shortmonth.Nov = month=11 translate.shortmonth.Dec = month=12 translate.type.handle = type=metadataView translate.type.bitstream = type=objectFile # filter webserver lines based on the values of the keys above # web_server_filter is the prefix, the 1 means it is the first filter (multiple # filters are allowed), requesturl is the key and the value is a regular expression web_server_filter.1.requesturl = ^/(?:dspace/)?(?:handle|bitstream)/\d+/\d+ web_server_filter.1.statuscode = 200 # process the data by using an (external) script. process is the prefix, ip is the # name of the key for which data is used (comma seperate multiple keys), hash is # the name of the new key and generate_hash is the name of the function to use. # The function is located in the suregenerate script. If you want an external script, # use the absolute path to the script. The script is run with the value(s) of the key(s) # that are defined. Return the value you want. The script is not run when no value # is present for the defined key. process.countrycode = produce_country_code ip process.day = process_day day process.timezone = process_timezone timezone process.hash = produce_ip_hash ip process.referrer = xml_encode referrer process.requesturl = xml_encode requesturl process.referrername = produce_referrer_name referrer process.usageeventid = produce_usage_event_id ip handle year month day hour minute seconds