250 lines
6.3 KiB
Go
250 lines
6.3 KiB
Go
// Copyright (c) 2015 Andy Leap, Google
|
|
// SPDX-License-Identifier: MIT
|
|
|
|
package microformats
|
|
|
|
import (
|
|
"regexp"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"golang.org/x/net/html"
|
|
)
|
|
|
|
// datetime represents a microformats datetime value. It encapsulates a
|
|
// time.Time value whose date, time, and timezone value can each be set
|
|
// independently. Each of these values can be set only once and once sent,
|
|
// subsequent calls to set the value will be ignored.
|
|
type datetime struct {
|
|
t time.Time
|
|
|
|
// track whether date, time (with or without seconds), and timezone values have been set
|
|
hasDate, hasTime, hasTZ bool
|
|
hasSeconds bool
|
|
}
|
|
|
|
// Set the date for d. Has no effect if date has already been set.
|
|
func (d *datetime) setDate(year int, month time.Month, day int) {
|
|
if d.hasDate {
|
|
return
|
|
}
|
|
d.t = time.Date(year, month, day, d.t.Hour(), d.t.Minute(), d.t.Second(), 0, d.t.Location())
|
|
d.hasDate = true
|
|
}
|
|
|
|
// Set the time for d. Has no effect if time has already been set.
|
|
func (d *datetime) setTime(hour, min, sec int) {
|
|
if d.hasTime {
|
|
return
|
|
}
|
|
d.t = time.Date(d.t.Year(), d.t.Month(), d.t.Day(), hour, min, sec, 0, d.t.Location())
|
|
d.hasTime = true
|
|
}
|
|
|
|
// Set the timezone for d. Has no effect if timezone has already been set.
|
|
func (d *datetime) setTZ(loc *time.Location) {
|
|
if d.hasTZ {
|
|
return
|
|
}
|
|
d.t = time.Date(d.t.Year(), d.t.Month(), d.t.Day(), d.t.Hour(), d.t.Minute(), d.t.Second(), 0, loc)
|
|
d.hasTZ = true
|
|
}
|
|
|
|
const (
|
|
formatDate = "2006-01-02"
|
|
formatDateTime = "2006-01-02 15:04"
|
|
formatDateTimeSeconds = "2006-01-02 15:04:05"
|
|
formatDateTimeTZ = "2006-01-02 15:04-0700"
|
|
formatDateTimeSecondsTZ = "2006-01-02 15:04:05-0700"
|
|
)
|
|
|
|
// String returns a string representation of d, using the format of
|
|
// "YYYY-MM-DD HH:MM:SS+XXYY", but omitting certain values not specified in the
|
|
// creation of d. For example:
|
|
//
|
|
// - if no date was specified, d is invalid and an empty string is returned
|
|
// - if no time was specified, time and timezone are omitted
|
|
// - if no timezone was specified, timezone is omitted
|
|
// - if no seconds were specified, seconds are omitted
|
|
// - if no minutes were specified, 00 is implied
|
|
//
|
|
// Microformat docs: http://microformats.org/wiki/value-class-pattern#Date_and_time_parsing
|
|
func (d *datetime) String() string {
|
|
if !d.hasDate {
|
|
return ""
|
|
}
|
|
if !d.hasTime {
|
|
return d.t.Format(formatDate)
|
|
}
|
|
if !d.hasTZ {
|
|
if d.hasSeconds {
|
|
return d.t.Format(formatDateTimeSeconds)
|
|
}
|
|
return d.t.Format(formatDateTime)
|
|
}
|
|
|
|
var value string
|
|
if d.hasSeconds {
|
|
value = d.t.Format(formatDateTimeSecondsTZ)
|
|
} else {
|
|
value = d.t.Format(formatDateTimeTZ)
|
|
}
|
|
|
|
// convert "+0000" to "Z", since time doesn't support a "Z-0700" format
|
|
if strings.HasSuffix(value, "+0000") {
|
|
value = strings.TrimSuffix(value, "+0000") + "Z"
|
|
}
|
|
return value
|
|
}
|
|
|
|
var (
|
|
// regex to match ordinal dates of the form YYYY-DDD
|
|
reOrdinalDate = regexp.MustCompile(`(\d{4})-(\d{3})`)
|
|
|
|
// regex to match various permutations of am/pm indicator. Supports
|
|
// the forms: "AM" and "A.M.". This assumes that the string has been
|
|
// converted to uppercase before comparison. Contains two capture
|
|
// groups, one for each letter matched.
|
|
reAMPM = regexp.MustCompile(`(A|P)\.?(M)\.?$`)
|
|
)
|
|
|
|
// various date time format strings
|
|
var (
|
|
datetimeFormats = []struct {
|
|
format string
|
|
hasSeconds bool
|
|
}{
|
|
{time.RFC3339, true},
|
|
{"2006-01-02T15:04:05-07:00", true},
|
|
{"2006-01-02T15:04:05-0700", true},
|
|
{"2006-01-02T15:04:05-07", true},
|
|
{"2006-01-02T15:04:05", true},
|
|
{"2006-01-02T15:04Z07:00", false},
|
|
{"2006-01-02T15:04-07:00", false},
|
|
{"2006-01-02T15:04-0700", false},
|
|
{"2006-01-02T15:04-07", false},
|
|
{"2006-01-02T15:04", false},
|
|
}
|
|
|
|
timeFormats = []struct {
|
|
format string
|
|
hasSeconds, hasTZ bool
|
|
}{
|
|
{"15:04:05", true, false},
|
|
{"15:04", false, false},
|
|
|
|
// with timezone
|
|
{"15:04:05Z07:00", true, true},
|
|
{"15:04:05-0700", true, true},
|
|
{"15:04Z07:00", false, true},
|
|
{"15:04-0700", false, true},
|
|
|
|
// with am/pm indicator
|
|
{"3:04:05PM", true, false},
|
|
{"3:04PM", false, false},
|
|
{"3PM", false, false},
|
|
}
|
|
|
|
tzFormats = []string{
|
|
"Z07:00",
|
|
"-0700",
|
|
"-07",
|
|
}
|
|
)
|
|
|
|
func (d *datetime) Parse(s string) {
|
|
// normalize datetime value
|
|
s = strings.ToUpper(s)
|
|
s = strings.Replace(s, " ", "T", 1)
|
|
s = reAMPM.ReplaceAllString(s, "$1$2")
|
|
|
|
// datetime formats
|
|
for _, f := range datetimeFormats {
|
|
if t, err := time.Parse(f.format, s); err == nil {
|
|
d.setDate(t.Year(), t.Month(), t.Day())
|
|
d.setTime(t.Hour(), t.Minute(), t.Second())
|
|
d.setTZ(t.Location())
|
|
d.hasSeconds = f.hasSeconds
|
|
return
|
|
}
|
|
}
|
|
|
|
// date-only formats
|
|
if t, err := time.Parse(formatDate, s); err == nil {
|
|
d.setDate(t.Year(), t.Month(), t.Day())
|
|
return
|
|
}
|
|
if m := reOrdinalDate.FindStringSubmatch(s); m != nil {
|
|
year, _ := strconv.Atoi(m[1])
|
|
days, _ := strconv.Atoi(m[2])
|
|
t := time.Date(year, 1, 1, 0, 0, 0, 0, time.UTC)
|
|
t = t.AddDate(0, 0, days-1)
|
|
d.setDate(t.Year(), t.Month(), t.Day())
|
|
return
|
|
}
|
|
|
|
// time formats
|
|
for _, f := range timeFormats {
|
|
if t, err := time.Parse(f.format, s); err == nil {
|
|
d.setTime(t.Hour(), t.Minute(), t.Second())
|
|
d.hasSeconds = f.hasSeconds
|
|
if f.hasTZ {
|
|
d.setTZ(t.Location())
|
|
}
|
|
return
|
|
}
|
|
}
|
|
|
|
// timezone only formats
|
|
for _, format := range tzFormats {
|
|
if t, err := time.Parse(format, s); err == nil {
|
|
d.setTZ(t.Location())
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
func getDateTimeValue(node *html.Node) *string {
|
|
values := parseValueClassPattern(node, true)
|
|
var d datetime
|
|
for _, v := range values {
|
|
d.Parse(strings.TrimSpace(v))
|
|
}
|
|
if value := d.String(); value != "" {
|
|
return &value
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Process implied date for 'end' property. This is technically part of the value class pattern
|
|
// parsing rules, and at this point, we don't know if these were specified using VCP, but we
|
|
// imply date all the same anyway.
|
|
func implyEndDate(item *Microformat) {
|
|
var startDate time.Time
|
|
for _, v := range item.Properties["start"] {
|
|
if start, ok := v.(string); ok {
|
|
var dt datetime
|
|
dt.Parse(start)
|
|
if dt.hasDate {
|
|
startDate = dt.t
|
|
break
|
|
}
|
|
}
|
|
}
|
|
if startDate.IsZero() {
|
|
return
|
|
}
|
|
|
|
for i, v := range item.Properties["end"] {
|
|
if end, ok := v.(string); ok {
|
|
var dt datetime
|
|
dt.Parse(end)
|
|
if !dt.t.IsZero() && !dt.hasDate {
|
|
dt.setDate(startDate.Year(), startDate.Month(), startDate.Day())
|
|
item.Properties["end"][i] = dt.String()
|
|
}
|
|
}
|
|
}
|
|
}
|