
250 lines
6.3 KiB

// Copyright (c) 2015 Andy Leap, Google
// SPDX-License-Identifier: MIT
package microformats
import (
// datetime represents a microformats datetime value. It encapsulates a
// time.Time value whose date, time, and timezone value can each be set
// independently. Each of these values can be set only once and once sent,
// subsequent calls to set the value will be ignored.
type datetime struct {
t time.Time
// track whether date, time (with or without seconds), and timezone values have been set
hasDate, hasTime, hasTZ bool
hasSeconds bool
// Set the date for d. Has no effect if date has already been set.
func (d *datetime) setDate(year int, month time.Month, day int) {
if d.hasDate {
d.t = time.Date(year, month, day, d.t.Hour(), d.t.Minute(), d.t.Second(), 0, d.t.Location())
d.hasDate = true
// Set the time for d. Has no effect if time has already been set.
func (d *datetime) setTime(hour, min, sec int) {
if d.hasTime {
d.t = time.Date(d.t.Year(), d.t.Month(), d.t.Day(), hour, min, sec, 0, d.t.Location())
d.hasTime = true
// Set the timezone for d. Has no effect if timezone has already been set.
func (d *datetime) setTZ(loc *time.Location) {
if d.hasTZ {
d.t = time.Date(d.t.Year(), d.t.Month(), d.t.Day(), d.t.Hour(), d.t.Minute(), d.t.Second(), 0, loc)
d.hasTZ = true
const (
formatDate = "2006-01-02"
formatDateTime = "2006-01-02 15:04"
formatDateTimeSeconds = "2006-01-02 15:04:05"
formatDateTimeTZ = "2006-01-02 15:04-0700"
formatDateTimeSecondsTZ = "2006-01-02 15:04:05-0700"
// String returns a string representation of d, using the format of
// "YYYY-MM-DD HH:MM:SS+XXYY", but omitting certain values not specified in the
// creation of d. For example:
// - if no date was specified, d is invalid and an empty string is returned
// - if no time was specified, time and timezone are omitted
// - if no timezone was specified, timezone is omitted
// - if no seconds were specified, seconds are omitted
// - if no minutes were specified, 00 is implied
// Microformat docs:
func (d *datetime) String() string {
if !d.hasDate {
return ""
if !d.hasTime {
return d.t.Format(formatDate)
if !d.hasTZ {
if d.hasSeconds {
return d.t.Format(formatDateTimeSeconds)
return d.t.Format(formatDateTime)
var value string
if d.hasSeconds {
value = d.t.Format(formatDateTimeSecondsTZ)
} else {
value = d.t.Format(formatDateTimeTZ)
// convert "+0000" to "Z", since time doesn't support a "Z-0700" format
if strings.HasSuffix(value, "+0000") {
value = strings.TrimSuffix(value, "+0000") + "Z"
return value
var (
// regex to match ordinal dates of the form YYYY-DDD
reOrdinalDate = regexp.MustCompile(`(\d{4})-(\d{3})`)
// regex to match various permutations of am/pm indicator. Supports
// the forms: "AM" and "A.M.". This assumes that the string has been
// converted to uppercase before comparison. Contains two capture
// groups, one for each letter matched.
reAMPM = regexp.MustCompile(`(A|P)\.?(M)\.?$`)
// various date time format strings
var (
datetimeFormats = []struct {
format string
hasSeconds bool
{time.RFC3339, true},
{"2006-01-02T15:04:05-07:00", true},
{"2006-01-02T15:04:05-0700", true},
{"2006-01-02T15:04:05-07", true},
{"2006-01-02T15:04:05", true},
{"2006-01-02T15:04Z07:00", false},
{"2006-01-02T15:04-07:00", false},
{"2006-01-02T15:04-0700", false},
{"2006-01-02T15:04-07", false},
{"2006-01-02T15:04", false},
timeFormats = []struct {
format string
hasSeconds, hasTZ bool
{"15:04:05", true, false},
{"15:04", false, false},
// with timezone
{"15:04:05Z07:00", true, true},
{"15:04:05-0700", true, true},
{"15:04Z07:00", false, true},
{"15:04-0700", false, true},
// with am/pm indicator
{"3:04:05PM", true, false},
{"3:04PM", false, false},
{"3PM", false, false},
tzFormats = []string{
func (d *datetime) Parse(s string) {
// normalize datetime value
s = strings.ToUpper(s)
s = strings.Replace(s, " ", "T", -1)
s = reAMPM.ReplaceAllString(s, "$1$2")
// datetime formats
for _, f := range datetimeFormats {
if t, err := time.Parse(f.format, s); err == nil {
d.setDate(t.Year(), t.Month(), t.Day())
d.setTime(t.Hour(), t.Minute(), t.Second())
d.hasSeconds = f.hasSeconds
// date-only formats
if t, err := time.Parse(formatDate, s); err == nil {
d.setDate(t.Year(), t.Month(), t.Day())
if m := reOrdinalDate.FindStringSubmatch(s); m != nil {
year, _ := strconv.Atoi(m[1])
days, _ := strconv.Atoi(m[2])
t := time.Date(year, 1, 1, 0, 0, 0, 0, time.UTC)
t = t.AddDate(0, 0, days-1)
d.setDate(t.Year(), t.Month(), t.Day())
// time formats
for _, f := range timeFormats {
if t, err := time.Parse(f.format, s); err == nil {
d.setTime(t.Hour(), t.Minute(), t.Second())
d.hasSeconds = f.hasSeconds
if f.hasTZ {
// timezone only formats
for _, format := range tzFormats {
if t, err := time.Parse(format, s); err == nil {
func getDateTimeValue(node *html.Node) *string {
values := parseValueClassPattern(node, true)
var d datetime
for _, v := range values {
if value := d.String(); value != "" {
return &value
return nil
// Process implied date for 'end' property. This is technically part of the value class pattern
// parsing rules, and at this point, we don't know if these were specified using VCP, but we
// imply date all the same anyway.
func implyEndDate(item *Microformat) {
var startDate time.Time
for _, v := range item.Properties["start"] {
if start, ok := v.(string); ok {
var dt datetime
if dt.hasDate {
startDate = dt.t
if startDate.IsZero() {
for i, v := range item.Properties["end"] {
if end, ok := v.(string); ok {
var dt datetime
if !dt.t.IsZero() && !dt.hasDate {
dt.setDate(startDate.Year(), startDate.Month(), startDate.Day())
item.Properties["end"][i] = dt.String()