Package parsedatetime
[hide private]
[frames] | no frames]

Source Code for Package parsedatetime

   1  # -*- coding: utf-8 -*- 
   2  # 
   3  # vim: sw=2 ts=2 sts=2 
   4  # 
   5  # Copyright 2004-2019 Mike Taylor 
   6  # 
   7  # Licensed under the Apache License, Version 2.0 (the "License"); 
   8  # you may not use this file except in compliance with the License. 
   9  # You may obtain a copy of the License at 
  10  # 
  11  #     http://www.apache.org/licenses/LICENSE-2.0 
  12  # 
  13  # Unless required by applicable law or agreed to in writing, software 
  14  # distributed under the License is distributed on an "AS IS" BASIS, 
  15  # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 
  16  # See the License for the specific language governing permissions and 
  17  # limitations under the License. 
  18   
  19  """parsedatetime 
  20   
  21  Parse human-readable date/time text. 
  22   
  23  Requires Python 2.7 or later 
  24  """ 
  25   
  26  from __future__ import with_statement, absolute_import, unicode_literals 
  27   
  28  import re 
  29  import time 
  30  import logging 
  31  import warnings 
  32  import datetime 
  33  import calendar 
  34  import contextlib 
  35  import email.utils 
  36   
  37  from .pdt_locales import (locales as _locales, 
  38                            get_icu, load_locale) 
  39  from .context import pdtContext, pdtContextStack 
  40  from .warns import pdt20DeprecationWarning 
  41   
  42   
  43  __author__ = 'Mike Taylor' 
  44  __email__ = 'bear@bear.im' 
  45  __copyright__ = 'Copyright (c) 2017 Mike Taylor' 
  46  __license__ = 'Apache License 2.0' 
  47  __version__ = '2.6' 
  48  __url__ = 'https://github.com/bear/parsedatetime' 
  49  __download_url__ = 'https://pypi.python.org/pypi/parsedatetime' 
  50  __description__ = 'Parse human-readable date/time text.' 
  51   
  52  # as a library, do *not* setup logging 
  53  # see docs.python.org/2/howto/logging.html#configuring-logging-for-a-library 
  54  # Set default logging handler to avoid "No handler found" warnings. 
  55   
  56  try:  # Python 2.7+ 
  57      from logging import NullHandler 
  58  except ImportError: 
59 - class NullHandler(logging.Handler):
60
61 - def emit(self, record):
62 pass
63 64 log = logging.getLogger(__name__) 65 log.addHandler(NullHandler()) 66 67 debug = False 68 69 pdtLocales = dict([(x, load_locale(x)) for x in _locales])
70 71 72 # Copied from feedparser.py 73 # Universal Feedparser 74 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 75 # Originally a def inside of _parse_date_w3dtf() 76 -def _extract_date(m):
77 year = int(m.group('year')) 78 if year < 100: 79 year = 100 * int(time.gmtime()[0] / 100) + int(year) 80 if year < 1000: 81 return 0, 0, 0 82 julian = m.group('julian') 83 if julian: 84 julian = int(julian) 85 month = julian / 30 + 1 86 day = julian % 30 + 1 87 jday = None 88 while jday != julian: 89 t = time.mktime((year, month, day, 0, 0, 0, 0, 0, 0)) 90 jday = time.gmtime(t)[-2] 91 diff = abs(jday - julian) 92 if jday > julian: 93 if diff < day: 94 day = day - diff 95 else: 96 month = month - 1 97 day = 31 98 elif jday < julian: 99 if day + diff < 28: 100 day = day + diff 101 else: 102 month = month + 1 103 return year, month, day 104 month = m.group('month') 105 day = 1 106 if month is None: 107 month = 1 108 else: 109 month = int(month) 110 day = m.group('day') 111 if day: 112 day = int(day) 113 else: 114 day = 1 115 return year, month, day
116
117 118 # Copied from feedparser.py 119 # Universal Feedparser 120 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 121 # Originally a def inside of _parse_date_w3dtf() 122 -def _extract_time(m):
123 if not m: 124 return 0, 0, 0 125 hours = m.group('hours') 126 if not hours: 127 return 0, 0, 0 128 hours = int(hours) 129 minutes = int(m.group('minutes')) 130 seconds = m.group('seconds') 131 if seconds: 132 seconds = seconds.replace(',', '.').split('.', 1)[0] 133 seconds = int(seconds) 134 else: 135 seconds = 0 136 return hours, minutes, seconds
137
138 139 -def _pop_time_accuracy(m, ctx):
140 if not m: 141 return 142 if m.group('hours'): 143 ctx.updateAccuracy(ctx.ACU_HOUR) 144 if m.group('minutes'): 145 ctx.updateAccuracy(ctx.ACU_MIN) 146 if m.group('seconds'): 147 ctx.updateAccuracy(ctx.ACU_SEC)
148
149 150 # Copied from feedparser.py 151 # Universal Feedparser 152 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 153 # Modified to return a tuple instead of mktime 154 # 155 # Original comment: 156 # W3DTF-style date parsing adapted from PyXML xml.utils.iso8601, written by 157 # Drake and licensed under the Python license. Removed all range checking 158 # for month, day, hour, minute, and second, since mktime will normalize 159 # these later 160 -def __closure_parse_date_w3dtf():
161 # the __extract_date and __extract_time methods were 162 # copied-out so they could be used by my code --bear 163 def __extract_tzd(m): 164 '''Return the Time Zone Designator as an offset in seconds from UTC.''' 165 if not m: 166 return 0 167 tzd = m.group('tzd') 168 if not tzd: 169 return 0 170 if tzd == 'Z': 171 return 0 172 hours = int(m.group('tzdhours')) 173 minutes = m.group('tzdminutes') 174 if minutes: 175 minutes = int(minutes) 176 else: 177 minutes = 0 178 offset = (hours * 60 + minutes) * 60 179 if tzd[0] == '+': 180 return -offset 181 return offset
182 183 def _parse_date_w3dtf(dateString): 184 m = __datetime_rx.match(dateString) 185 if m is None or m.group() != dateString: 186 return 187 return _extract_date(m) + _extract_time(m) + (0, 0, 0) 188 189 __date_re = (r'(?P<year>\d\d\d\d)' 190 r'(?:(?P<dsep>-|)' 191 r'(?:(?P<julian>\d\d\d)' 192 r'|(?P<month>\d\d)(?:(?P=dsep)(?P<day>\d\d))?))?') 193 __tzd_re = r'(?P<tzd>[-+](?P<tzdhours>\d\d)(?::?(?P<tzdminutes>\d\d))|Z)' 194 # __tzd_rx = re.compile(__tzd_re) 195 __time_re = (r'(?P<hours>\d\d)(?P<tsep>:|)(?P<minutes>\d\d)' 196 r'(?:(?P=tsep)(?P<seconds>\d\d(?:[.,]\d+)?))?' + __tzd_re) 197 __datetime_re = '%s(?:T%s)?' % (__date_re, __time_re) 198 __datetime_rx = re.compile(__datetime_re) 199 200 return _parse_date_w3dtf 201 202 203 _parse_date_w3dtf = __closure_parse_date_w3dtf() 204 del __closure_parse_date_w3dtf 205 206 _monthnames = set([ 207 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 208 'aug', 'sep', 'oct', 'nov', 'dec', 209 'january', 'february', 'march', 'april', 'may', 'june', 'july', 210 'august', 'september', 'october', 'november', 'december']) 211 _daynames = set(['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun'])
212 213 214 # Copied from feedparser.py 215 # Universal Feedparser 216 # Copyright (c) 2002-2006, Mark Pilgrim, All rights reserved. 217 # Modified to return a tuple instead of mktime 218 -def _parse_date_rfc822(dateString):
219 '''Parse an RFC822, RFC1123, RFC2822, or asctime-style date''' 220 data = dateString.split() 221 if data[0][-1] in (',', '.') or data[0].lower() in _daynames: 222 del data[0] 223 if len(data) == 4: 224 s = data[3] 225 s = s.split('+', 1) 226 if len(s) == 2: 227 data[3:] = s 228 else: 229 data.append('') 230 dateString = " ".join(data) 231 if len(data) < 5: 232 dateString += ' 00:00:00 GMT' 233 return email.utils.parsedate_tz(dateString)
234 235 236 # rfc822.py defines several time zones, but we define some extra ones. 237 # 'ET' is equivalent to 'EST', etc. 238 # _additional_timezones = {'AT': -400, 'ET': -500, 239 # 'CT': -600, 'MT': -700, 240 # 'PT': -800} 241 # email.utils._timezones.update(_additional_timezones) 242 243 VERSION_FLAG_STYLE = 1 244 VERSION_CONTEXT_STYLE = 2
245 246 247 -class Calendar(object):
248 249 """ 250 A collection of routines to input, parse and manipulate date and times. 251 The text can either be 'normal' date values or it can be human readable. 252 """ 253
254 - def __init__(self, constants=None, version=VERSION_FLAG_STYLE):
255 """ 256 Default constructor for the L{Calendar} class. 257 258 @type constants: object 259 @param constants: Instance of the class L{Constants} 260 @type version: integer 261 @param version: Default style version of current Calendar instance. 262 Valid value can be 1 (L{VERSION_FLAG_STYLE}) or 263 2 (L{VERSION_CONTEXT_STYLE}). See L{parse()}. 264 265 @rtype: object 266 @return: L{Calendar} instance 267 """ 268 # if a constants reference is not included, use default 269 if constants is None: 270 self.ptc = Constants() 271 else: 272 self.ptc = constants 273 274 self.version = version 275 if version == VERSION_FLAG_STYLE: 276 warnings.warn( 277 'Flag style will be deprecated in parsedatetime 2.0. ' 278 'Instead use the context style by instantiating `Calendar()` ' 279 'with argument `version=parsedatetime.VERSION_CONTEXT_STYLE`.', 280 pdt20DeprecationWarning) 281 self._ctxStack = pdtContextStack()
282 283 @contextlib.contextmanager
284 - def context(self):
285 ctx = pdtContext() 286 self._ctxStack.push(ctx) 287 yield ctx 288 ctx = self._ctxStack.pop() 289 if not self._ctxStack.isEmpty(): 290 self.currentContext.update(ctx)
291 292 @property
293 - def currentContext(self):
294 return self._ctxStack.last()
295
296 - def _convertUnitAsWords(self, unitText):
297 """ 298 Converts text units into their number value. 299 300 @type unitText: string 301 @param unitText: number text to convert 302 303 @rtype: integer 304 @return: numerical value of unitText 305 """ 306 word_list, a, b = re.split(r"[,\s-]+", unitText), 0, 0 307 for word in word_list: 308 x = self.ptc.small.get(word) 309 if x is not None: 310 a += x 311 elif word == "hundred": 312 a *= 100 313 else: 314 x = self.ptc.magnitude.get(word) 315 if x is not None: 316 b += a * x 317 a = 0 318 elif word in self.ptc.ignore: 319 pass 320 else: 321 raise Exception("Unknown number: " + word) 322 return a + b
323
324 - def _buildTime(self, source, quantity, modifier, units):
325 """ 326 Take C{quantity}, C{modifier} and C{unit} strings and convert them 327 into values. After converting, calcuate the time and return the 328 adjusted sourceTime. 329 330 @type source: time 331 @param source: time to use as the base (or source) 332 @type quantity: string 333 @param quantity: quantity string 334 @type modifier: string 335 @param modifier: how quantity and units modify the source time 336 @type units: string 337 @param units: unit of the quantity (i.e. hours, days, months, etc) 338 339 @rtype: struct_time 340 @return: C{struct_time} of the calculated time 341 """ 342 ctx = self.currentContext 343 debug and log.debug('_buildTime: [%s][%s][%s]', 344 quantity, modifier, units) 345 346 if source is None: 347 source = time.localtime() 348 349 if quantity is None: 350 quantity = '' 351 else: 352 quantity = quantity.strip() 353 354 qty = self._quantityToReal(quantity) 355 356 if modifier in self.ptc.Modifiers: 357 qty = qty * self.ptc.Modifiers[modifier] 358 359 if units is None or units == '': 360 units = 'dy' 361 362 # plurals are handled by regex's (could be a bug tho) 363 364 (yr, mth, dy, hr, mn, sec, _, _, _) = source 365 366 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 367 target = start 368 # realunit = next((key for key, values in self.ptc.units.items() 369 # if any(imap(units.__contains__, values))), None) 370 realunit = units 371 for key, values in self.ptc.units.items(): 372 if units in values: 373 realunit = key 374 break 375 376 debug and log.debug('units %s --> realunit %s (qty=%s)', 377 units, realunit, qty) 378 379 try: 380 if realunit in ('years', 'months'): 381 target = self.inc(start, **{realunit[:-1]: qty}) 382 elif realunit in ('days', 'hours', 'minutes', 'seconds', 'weeks'): 383 delta = datetime.timedelta(**{realunit: qty}) 384 target = start + delta 385 except OverflowError: 386 # OverflowError is raise when target.year larger than 9999 387 pass 388 else: 389 ctx.updateAccuracy(realunit) 390 391 return target.timetuple()
392
393 - def parseDate(self, dateString, sourceTime=None):
394 """ 395 Parse short-form date strings:: 396 397 '05/28/2006' or '04.21' 398 399 @type dateString: string 400 @param dateString: text to convert to a C{datetime} 401 @type sourceTime: struct_time 402 @param sourceTime: C{struct_time} value to use as the base 403 404 @rtype: struct_time 405 @return: calculated C{struct_time} value of dateString 406 """ 407 if sourceTime is None: 408 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 409 else: 410 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 411 412 # values pulled from regex's will be stored here and later 413 # assigned to mth, dy, yr based on information from the locale 414 # -1 is used as the marker value because we want zero values 415 # to be passed thru so they can be flagged as errors later 416 v1 = -1 417 v2 = -1 418 v3 = -1 419 accuracy = [] 420 421 s = dateString 422 m = self.ptc.CRE_DATE2.search(s) 423 if m is not None: 424 index = m.start() 425 v1 = int(s[:index]) 426 s = s[index + 1:] 427 428 m = self.ptc.CRE_DATE2.search(s) 429 if m is not None: 430 index = m.start() 431 v2 = int(s[:index]) 432 v3 = int(s[index + 1:]) 433 else: 434 v2 = int(s.strip()) 435 436 v = [v1, v2, v3] 437 d = {'m': mth, 'd': dy, 'y': yr} 438 439 # yyyy/mm/dd format 440 dp_order = self.ptc.dp_order if v1 <= 31 else ['y', 'm', 'd'] 441 442 for i in range(0, 3): 443 n = v[i] 444 c = dp_order[i] 445 if n >= 0: 446 d[c] = n 447 accuracy.append({'m': pdtContext.ACU_MONTH, 448 'd': pdtContext.ACU_DAY, 449 'y': pdtContext.ACU_YEAR}[c]) 450 451 # if the year is not specified and the date has already 452 # passed, increment the year 453 if v3 == -1 and ((mth > d['m']) or (mth == d['m'] and dy > d['d'])): 454 yr = d['y'] + self.ptc.YearParseStyle 455 else: 456 yr = d['y'] 457 458 mth = d['m'] 459 dy = d['d'] 460 461 # birthday epoch constraint 462 if yr < self.ptc.BirthdayEpoch: 463 yr += 2000 464 elif yr < 100: 465 yr += 1900 466 467 daysInCurrentMonth = self.ptc.daysInMonth(mth, yr) 468 debug and log.debug('parseDate: %s %s %s %s', 469 yr, mth, dy, daysInCurrentMonth) 470 471 with self.context() as ctx: 472 if mth > 0 and mth <= 12 and dy > 0 and \ 473 dy <= daysInCurrentMonth: 474 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 475 ctx.updateAccuracy(*accuracy) 476 else: 477 # return current time if date string is invalid 478 sourceTime = time.localtime() 479 480 return sourceTime
481
482 - def parseDateText(self, dateString, sourceTime=None):
483 """ 484 Parse long-form date strings:: 485 486 'May 31st, 2006' 487 'Jan 1st' 488 'July 2006' 489 490 @type dateString: string 491 @param dateString: text to convert to a datetime 492 @type sourceTime: struct_time 493 @param sourceTime: C{struct_time} value to use as the base 494 495 @rtype: struct_time 496 @return: calculated C{struct_time} value of dateString 497 """ 498 if sourceTime is None: 499 yr, mth, dy, hr, mn, sec, wd, yd, isdst = time.localtime() 500 else: 501 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 502 503 currentMth = mth 504 currentDy = dy 505 accuracy = [] 506 507 debug and log.debug('parseDateText currentMth %s currentDy %s', 508 mth, dy) 509 510 s = dateString.lower() 511 m = self.ptc.CRE_DATE3.search(s) 512 mth = m.group('mthname') 513 mth = self.ptc.MonthOffsets[mth] 514 accuracy.append('month') 515 516 if m.group('day') is not None: 517 dy = int(m.group('day')) 518 accuracy.append('day') 519 else: 520 dy = 1 521 522 if m.group('year') is not None: 523 yr = int(m.group('year')) 524 accuracy.append('year') 525 526 # birthday epoch constraint 527 if yr < self.ptc.BirthdayEpoch: 528 yr += 2000 529 elif yr < 100: 530 yr += 1900 531 532 elif (mth < currentMth) or (mth == currentMth and dy < currentDy): 533 # if that day and month have already passed in this year, 534 # then increment the year by 1 535 yr += self.ptc.YearParseStyle 536 537 with self.context() as ctx: 538 if dy > 0 and dy <= self.ptc.daysInMonth(mth, yr): 539 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 540 ctx.updateAccuracy(*accuracy) 541 else: 542 # Return current time if date string is invalid 543 sourceTime = time.localtime() 544 545 debug and log.debug('parseDateText returned ' 546 'mth %d dy %d yr %d sourceTime %s', 547 mth, dy, yr, sourceTime) 548 549 return sourceTime
550
551 - def evalRanges(self, datetimeString, sourceTime=None):
552 """ 553 Evaluate the C{datetimeString} text and determine if 554 it represents a date or time range. 555 556 @type datetimeString: string 557 @param datetimeString: datetime text to evaluate 558 @type sourceTime: struct_time 559 @param sourceTime: C{struct_time} value to use as the base 560 561 @rtype: tuple 562 @return: tuple of: start datetime, end datetime and the invalid flag 563 """ 564 rangeFlag = retFlag = 0 565 startStr = endStr = '' 566 567 s = datetimeString.strip().lower() 568 569 if self.ptc.rangeSep in s: 570 s = s.replace(self.ptc.rangeSep, ' %s ' % self.ptc.rangeSep) 571 s = s.replace(' ', ' ') 572 573 for cre, rflag in [(self.ptc.CRE_TIMERNG1, 1), 574 (self.ptc.CRE_TIMERNG2, 2), 575 (self.ptc.CRE_TIMERNG4, 7), 576 (self.ptc.CRE_TIMERNG3, 3), 577 (self.ptc.CRE_DATERNG1, 4), 578 (self.ptc.CRE_DATERNG2, 5), 579 (self.ptc.CRE_DATERNG3, 6)]: 580 m = cre.search(s) 581 if m is not None: 582 rangeFlag = rflag 583 break 584 585 debug and log.debug('evalRanges: rangeFlag = %s [%s]', rangeFlag, s) 586 587 if m is not None: 588 if (m.group() != s): 589 # capture remaining string 590 parseStr = m.group() 591 chunk1 = s[:m.start()] 592 chunk2 = s[m.end():] 593 s = '%s %s' % (chunk1, chunk2) 594 595 sourceTime, ctx = self.parse(s, sourceTime, 596 VERSION_CONTEXT_STYLE) 597 598 if not ctx.hasDateOrTime: 599 sourceTime = None 600 else: 601 parseStr = s 602 603 if rangeFlag in (1, 2): 604 m = re.search(self.ptc.rangeSep, parseStr) 605 startStr = parseStr[:m.start()] 606 endStr = parseStr[m.start() + 1:] 607 retFlag = 2 608 609 elif rangeFlag in (3, 7): 610 m = re.search(self.ptc.rangeSep, parseStr) 611 # capturing the meridian from the end time 612 if self.ptc.usesMeridian: 613 ampm = re.search(self.ptc.am[0], parseStr) 614 615 # appending the meridian to the start time 616 if ampm is not None: 617 startStr = parseStr[:m.start()] + self.ptc.meridian[0] 618 else: 619 startStr = parseStr[:m.start()] + self.ptc.meridian[1] 620 else: 621 startStr = parseStr[:m.start()] 622 623 endStr = parseStr[m.start() + 1:] 624 retFlag = 2 625 626 elif rangeFlag == 4: 627 m = re.search(self.ptc.rangeSep, parseStr) 628 startStr = parseStr[:m.start()] 629 endStr = parseStr[m.start() + 1:] 630 retFlag = 1 631 632 elif rangeFlag == 5: 633 m = re.search(self.ptc.rangeSep, parseStr) 634 endStr = parseStr[m.start() + 1:] 635 636 # capturing the year from the end date 637 date = self.ptc.CRE_DATE3.search(endStr) 638 endYear = date.group('year') 639 640 # appending the year to the start date if the start date 641 # does not have year information and the end date does. 642 # eg : "Aug 21 - Sep 4, 2007" 643 if endYear is not None: 644 startStr = (parseStr[:m.start()]).strip() 645 date = self.ptc.CRE_DATE3.search(startStr) 646 startYear = date.group('year') 647 648 if startYear is None: 649 startStr = startStr + ', ' + endYear 650 else: 651 startStr = parseStr[:m.start()] 652 653 retFlag = 1 654 655 elif rangeFlag == 6: 656 m = re.search(self.ptc.rangeSep, parseStr) 657 658 startStr = parseStr[:m.start()] 659 660 # capturing the month from the start date 661 mth = self.ptc.CRE_DATE3.search(startStr) 662 mth = mth.group('mthname') 663 664 # appending the month name to the end date 665 endStr = mth + parseStr[(m.start() + 1):] 666 667 retFlag = 1 668 669 else: 670 # if range is not found 671 startDT = endDT = time.localtime() 672 673 if retFlag: 674 startDT, sctx = self.parse(startStr, sourceTime, 675 VERSION_CONTEXT_STYLE) 676 endDT, ectx = self.parse(endStr, sourceTime, 677 VERSION_CONTEXT_STYLE) 678 679 if not sctx.hasDateOrTime or not ectx.hasDateOrTime: 680 retFlag = 0 681 682 return startDT, endDT, retFlag
683
684 - def _CalculateDOWDelta(self, wd, wkdy, offset, style, currentDayStyle):
685 """ 686 Based on the C{style} and C{currentDayStyle} determine what 687 day-of-week value is to be returned. 688 689 @type wd: integer 690 @param wd: day-of-week value for the current day 691 @type wkdy: integer 692 @param wkdy: day-of-week value for the parsed day 693 @type offset: integer 694 @param offset: offset direction for any modifiers (-1, 0, 1) 695 @type style: integer 696 @param style: normally the value 697 set in C{Constants.DOWParseStyle} 698 @type currentDayStyle: integer 699 @param currentDayStyle: normally the value 700 set in C{Constants.CurrentDOWParseStyle} 701 702 @rtype: integer 703 @return: calculated day-of-week 704 """ 705 diffBase = wkdy - wd 706 origOffset = offset 707 708 if offset == 2: 709 # no modifier is present. 710 # i.e. string to be parsed is just DOW 711 if wkdy * style > wd * style or \ 712 currentDayStyle and wkdy == wd: 713 # wkdy located in current week 714 offset = 0 715 elif style in (-1, 1): 716 # wkdy located in last (-1) or next (1) week 717 offset = style 718 else: 719 # invalid style, or should raise error? 720 offset = 0 721 722 # offset = -1 means last week 723 # offset = 0 means current week 724 # offset = 1 means next week 725 diff = diffBase + 7 * offset 726 if style == 1 and diff < -7: 727 diff += 7 728 elif style == -1 and diff > 7: 729 diff -= 7 730 731 debug and log.debug("wd %s, wkdy %s, offset %d, " 732 "style %d, currentDayStyle %d", 733 wd, wkdy, origOffset, style, currentDayStyle) 734 735 return diff
736
737 - def _quantityToReal(self, quantity):
738 """ 739 Convert a quantity, either spelled-out or numeric, to a float 740 741 @type quantity: string 742 @param quantity: quantity to parse to float 743 @rtype: int 744 @return: the quantity as an float, defaulting to 0.0 745 """ 746 if not quantity: 747 return 1.0 748 749 try: 750 return float(quantity.replace(',', '.')) 751 except ValueError: 752 pass 753 754 try: 755 return float(self.ptc.numbers[quantity]) 756 except KeyError: 757 pass 758 759 return 0.0
760
761 - def _evalModifier(self, modifier, chunk1, chunk2, sourceTime):
762 """ 763 Evaluate the C{modifier} string and following text (passed in 764 as C{chunk1} and C{chunk2}) and if they match any known modifiers 765 calculate the delta and apply it to C{sourceTime}. 766 767 @type modifier: string 768 @param modifier: modifier text to apply to sourceTime 769 @type chunk1: string 770 @param chunk1: text chunk that preceded modifier (if any) 771 @type chunk2: string 772 @param chunk2: text chunk that followed modifier (if any) 773 @type sourceTime: struct_time 774 @param sourceTime: C{struct_time} value to use as the base 775 776 @rtype: tuple 777 @return: tuple of: remaining text and the modified sourceTime 778 """ 779 ctx = self.currentContext 780 offset = self.ptc.Modifiers[modifier] 781 782 if sourceTime is not None: 783 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 784 else: 785 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 786 787 if self.ptc.StartTimeFromSourceTime: 788 startHour = hr 789 startMinute = mn 790 startSecond = sec 791 else: 792 startHour = self.ptc.StartHour 793 startMinute = 0 794 startSecond = 0 795 796 # capture the units after the modifier and the remaining 797 # string after the unit 798 m = self.ptc.CRE_REMAINING.search(chunk2) 799 if m is not None: 800 index = m.start() + 1 801 unit = chunk2[:m.start()] 802 chunk2 = chunk2[index:] 803 else: 804 unit = chunk2 805 chunk2 = '' 806 807 debug and log.debug("modifier [%s] chunk1 [%s] " 808 "chunk2 [%s] unit [%s]", 809 modifier, chunk1, chunk2, unit) 810 811 if unit in self.ptc.units['months']: 812 currentDaysInMonth = self.ptc.daysInMonth(mth, yr) 813 if offset == 0: 814 dy = currentDaysInMonth 815 sourceTime = (yr, mth, dy, startHour, startMinute, 816 startSecond, wd, yd, isdst) 817 elif offset == 2: 818 # if day is the last day of the month, calculate the last day 819 # of the next month 820 if dy == currentDaysInMonth: 821 dy = self.ptc.daysInMonth(mth + 1, yr) 822 823 start = datetime.datetime(yr, mth, dy, startHour, 824 startMinute, startSecond) 825 target = self.inc(start, month=1) 826 sourceTime = target.timetuple() 827 else: 828 start = datetime.datetime(yr, mth, 1, startHour, 829 startMinute, startSecond) 830 target = self.inc(start, month=offset) 831 sourceTime = target.timetuple() 832 ctx.updateAccuracy(ctx.ACU_MONTH) 833 834 elif unit in self.ptc.units['weeks']: 835 if offset == 0: 836 start = datetime.datetime(yr, mth, dy, 17, 0, 0) 837 target = start + datetime.timedelta(days=(4 - wd)) 838 sourceTime = target.timetuple() 839 elif offset == 2: 840 start = datetime.datetime(yr, mth, dy, startHour, 841 startMinute, startSecond) 842 target = start + datetime.timedelta(days=7) 843 sourceTime = target.timetuple() 844 else: 845 start = datetime.datetime(yr, mth, dy, startHour, 846 startMinute, startSecond) 847 target = start + offset * datetime.timedelta(weeks=1) 848 sourceTime = target.timetuple() 849 ctx.updateAccuracy(ctx.ACU_WEEK) 850 851 elif unit in self.ptc.units['days']: 852 if offset == 0: 853 sourceTime = (yr, mth, dy, 17, 0, 0, wd, yd, isdst) 854 ctx.updateAccuracy(ctx.ACU_HALFDAY) 855 elif offset == 2: 856 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 857 target = start + datetime.timedelta(days=1) 858 sourceTime = target.timetuple() 859 else: 860 start = datetime.datetime(yr, mth, dy, startHour, 861 startMinute, startSecond) 862 target = start + datetime.timedelta(days=offset) 863 sourceTime = target.timetuple() 864 ctx.updateAccuracy(ctx.ACU_DAY) 865 866 elif unit in self.ptc.units['hours']: 867 if offset == 0: 868 sourceTime = (yr, mth, dy, hr, 0, 0, wd, yd, isdst) 869 else: 870 start = datetime.datetime(yr, mth, dy, hr, 0, 0) 871 target = start + datetime.timedelta(hours=offset) 872 sourceTime = target.timetuple() 873 ctx.updateAccuracy(ctx.ACU_HOUR) 874 875 elif unit in self.ptc.units['years']: 876 if offset == 0: 877 sourceTime = (yr, 12, 31, hr, mn, sec, wd, yd, isdst) 878 elif offset == 2: 879 sourceTime = (yr + 1, mth, dy, hr, mn, sec, wd, yd, isdst) 880 else: 881 sourceTime = (yr + offset, 1, 1, startHour, startMinute, 882 startSecond, wd, yd, isdst) 883 ctx.updateAccuracy(ctx.ACU_YEAR) 884 885 elif modifier == 'eom': 886 dy = self.ptc.daysInMonth(mth, yr) 887 sourceTime = (yr, mth, dy, startHour, startMinute, 888 startSecond, wd, yd, isdst) 889 ctx.updateAccuracy(ctx.ACU_DAY) 890 891 elif modifier == 'eoy': 892 mth = 12 893 dy = self.ptc.daysInMonth(mth, yr) 894 sourceTime = (yr, mth, dy, startHour, startMinute, 895 startSecond, wd, yd, isdst) 896 ctx.updateAccuracy(ctx.ACU_MONTH) 897 898 elif self.ptc.CRE_WEEKDAY.match(unit): 899 m = self.ptc.CRE_WEEKDAY.match(unit) 900 debug and log.debug('CRE_WEEKDAY matched') 901 wkdy = m.group() 902 903 if modifier == 'eod': 904 ctx.updateAccuracy(ctx.ACU_HOUR) 905 # Calculate the upcoming weekday 906 sourceTime, subctx = self.parse(wkdy, sourceTime, 907 VERSION_CONTEXT_STYLE) 908 sTime = self.ptc.getSource(modifier, sourceTime) 909 if sTime is not None: 910 sourceTime = sTime 911 ctx.updateAccuracy(ctx.ACU_HALFDAY) 912 else: 913 # unless one of these modifiers is being applied to the 914 # day-of-week, we want to start with target as the day 915 # in the current week. 916 dowOffset = offset 917 relativeModifier = modifier not in ['this', 'next', 'last', 'prior', 'previous'] 918 if relativeModifier: 919 dowOffset = 0 920 921 wkdy = self.ptc.WeekdayOffsets[wkdy] 922 diff = self._CalculateDOWDelta( 923 wd, wkdy, dowOffset, self.ptc.DOWParseStyle, 924 self.ptc.CurrentDOWParseStyle) 925 start = datetime.datetime(yr, mth, dy, startHour, 926 startMinute, startSecond) 927 target = start + datetime.timedelta(days=diff) 928 929 if chunk1 != '' and relativeModifier: 930 # consider "one day before thursday": we need to parse chunk1 ("one day") 931 # and apply according to the offset ("before"), rather than allowing the 932 # remaining parse step to apply "one day" without the offset direction. 933 t, subctx = self.parse(chunk1, sourceTime, VERSION_CONTEXT_STYLE) 934 if subctx.hasDateOrTime: 935 delta = time.mktime(t) - time.mktime(sourceTime) 936 target = start + datetime.timedelta(days=diff) + datetime.timedelta(seconds=delta * offset) 937 chunk1 = '' 938 939 sourceTime = target.timetuple() 940 ctx.updateAccuracy(ctx.ACU_DAY) 941 942 elif chunk1 == '' and chunk2 == '' and self.ptc.CRE_TIME.match(unit): 943 m = self.ptc.CRE_TIME.match(unit) 944 debug and log.debug('CRE_TIME matched') 945 (yr, mth, dy, hr, mn, sec, wd, yd, isdst), subctx = \ 946 self.parse(unit, None, VERSION_CONTEXT_STYLE) 947 948 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 949 target = start + datetime.timedelta(days=offset) 950 sourceTime = target.timetuple() 951 952 else: 953 # check if the remaining text is parsable and if so, 954 # use it as the base time for the modifier source time 955 956 debug and log.debug('check for modifications ' 957 'to source time [%s] [%s]', 958 chunk1, unit) 959 960 unit = unit.strip() 961 if unit: 962 s = '%s %s' % (unit, chunk2) 963 t, subctx = self.parse(s, sourceTime, VERSION_CONTEXT_STYLE) 964 965 if subctx.hasDate: # working with dates 966 u = unit.lower() 967 if u in self.ptc.Months or \ 968 u in self.ptc.shortMonths: 969 yr, mth, dy, hr, mn, sec, wd, yd, isdst = t 970 start = datetime.datetime( 971 yr, mth, dy, hr, mn, sec) 972 t = self.inc(start, year=offset).timetuple() 973 elif u in self.ptc.Weekdays: 974 t = t + datetime.timedelta(weeks=offset) 975 976 if subctx.hasDateOrTime: 977 sourceTime = t 978 chunk2 = '' 979 980 chunk1 = chunk1.strip() 981 982 # if the word after next is a number, the string is more than 983 # likely to be "next 4 hrs" which we will have to combine the 984 # units with the rest of the string 985 if chunk1: 986 try: 987 m = list(self.ptc.CRE_NUMBER.finditer(chunk1))[-1] 988 except IndexError: 989 pass 990 else: 991 qty = None 992 debug and log.debug('CRE_NUMBER matched') 993 qty = self._quantityToReal(m.group()) * offset 994 chunk1 = '%s%s%s' % (chunk1[:m.start()], 995 qty, chunk1[m.end():]) 996 t, subctx = self.parse(chunk1, sourceTime, 997 VERSION_CONTEXT_STYLE) 998 999 chunk1 = '' 1000 1001 if subctx.hasDateOrTime: 1002 sourceTime = t 1003 1004 debug and log.debug('looking for modifier %s', modifier) 1005 sTime = self.ptc.getSource(modifier, sourceTime) 1006 if sTime is not None: 1007 debug and log.debug('modifier found in sources') 1008 sourceTime = sTime 1009 ctx.updateAccuracy(ctx.ACU_HALFDAY) 1010 1011 debug and log.debug('returning chunk = "%s %s" and sourceTime = %s', 1012 chunk1, chunk2, sourceTime) 1013 1014 return '%s %s' % (chunk1, chunk2), sourceTime
1015
1016 - def _evalDT(self, datetimeString, sourceTime):
1017 """ 1018 Calculate the datetime from known format like RFC822 or W3CDTF 1019 1020 Examples handled:: 1021 RFC822, W3CDTF formatted dates 1022 HH:MM[:SS][ am/pm] 1023 MM/DD/YYYY 1024 DD MMMM YYYY 1025 1026 @type datetimeString: string 1027 @param datetimeString: text to try and parse as more "traditional" 1028 date/time text 1029 @type sourceTime: struct_time 1030 @param sourceTime: C{struct_time} value to use as the base 1031 1032 @rtype: datetime 1033 @return: calculated C{struct_time} value or current C{struct_time} 1034 if not parsed 1035 """ 1036 ctx = self.currentContext 1037 s = datetimeString.strip() 1038 1039 # Given string date is a RFC822 date 1040 if sourceTime is None: 1041 sourceTime = _parse_date_rfc822(s) 1042 debug and log.debug( 1043 'attempt to parse as rfc822 - %s', str(sourceTime)) 1044 1045 if sourceTime is not None: 1046 (yr, mth, dy, hr, mn, sec, wd, yd, isdst, _) = sourceTime 1047 ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY) 1048 1049 if hr != 0 and mn != 0 and sec != 0: 1050 ctx.updateAccuracy(ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC) 1051 1052 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1053 1054 # Given string date is a W3CDTF date 1055 if sourceTime is None: 1056 sourceTime = _parse_date_w3dtf(s) 1057 1058 if sourceTime is not None: 1059 ctx.updateAccuracy(ctx.ACU_YEAR, ctx.ACU_MONTH, ctx.ACU_DAY, 1060 ctx.ACU_HOUR, ctx.ACU_MIN, ctx.ACU_SEC) 1061 1062 if sourceTime is None: 1063 sourceTime = time.localtime() 1064 1065 return sourceTime
1066
1067 - def _evalUnits(self, datetimeString, sourceTime):
1068 """ 1069 Evaluate text passed by L{_partialParseUnits()} 1070 """ 1071 s = datetimeString.strip() 1072 sourceTime = self._evalDT(datetimeString, sourceTime) 1073 1074 # Given string is a time string with units like "5 hrs 30 min" 1075 modifier = '' # TODO 1076 1077 m = self.ptc.CRE_UNITS.search(s) 1078 if m is not None: 1079 units = m.group('units') 1080 quantity = s[:m.start('units')] 1081 1082 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1083 return sourceTime
1084
1085 - def _evalQUnits(self, datetimeString, sourceTime):
1086 """ 1087 Evaluate text passed by L{_partialParseQUnits()} 1088 """ 1089 s = datetimeString.strip() 1090 sourceTime = self._evalDT(datetimeString, sourceTime) 1091 1092 # Given string is a time string with single char units like "5 h 30 m" 1093 modifier = '' # TODO 1094 1095 m = self.ptc.CRE_QUNITS.search(s) 1096 if m is not None: 1097 units = m.group('qunits') 1098 quantity = s[:m.start('qunits')] 1099 1100 sourceTime = self._buildTime(sourceTime, quantity, modifier, units) 1101 return sourceTime
1102
1103 - def _evalDateStr(self, datetimeString, sourceTime):
1104 """ 1105 Evaluate text passed by L{_partialParseDateStr()} 1106 """ 1107 s = datetimeString.strip() 1108 sourceTime = self._evalDT(datetimeString, sourceTime) 1109 1110 # Given string is in the format "May 23rd, 2005" 1111 debug and log.debug('checking for MMM DD YYYY') 1112 return self.parseDateText(s, sourceTime)
1113
1114 - def _evalDateStd(self, datetimeString, sourceTime):
1115 """ 1116 Evaluate text passed by L{_partialParseDateStd()} 1117 """ 1118 s = datetimeString.strip() 1119 sourceTime = self._evalDT(datetimeString, sourceTime) 1120 1121 # Given string is in the format 07/21/2006 1122 return self.parseDate(s, sourceTime)
1123
1124 - def _evalDayStr(self, datetimeString, sourceTime):
1125 """ 1126 Evaluate text passed by L{_partialParseDaystr()} 1127 """ 1128 s = datetimeString.strip() 1129 sourceTime = self._evalDT(datetimeString, sourceTime) 1130 1131 # Given string is a natural language date string like today, tomorrow.. 1132 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 1133 1134 try: 1135 offset = self.ptc.dayOffsets[s] 1136 except KeyError: 1137 offset = 0 1138 1139 if self.ptc.StartTimeFromSourceTime: 1140 startHour = hr 1141 startMinute = mn 1142 startSecond = sec 1143 else: 1144 startHour = self.ptc.StartHour 1145 startMinute = 0 1146 startSecond = 0 1147 1148 self.currentContext.updateAccuracy(pdtContext.ACU_DAY) 1149 start = datetime.datetime(yr, mth, dy, startHour, 1150 startMinute, startSecond) 1151 target = start + datetime.timedelta(days=offset) 1152 return target.timetuple()
1153
1154 - def _evalWeekday(self, datetimeString, sourceTime):
1155 """ 1156 Evaluate text passed by L{_partialParseWeekday()} 1157 """ 1158 s = datetimeString.strip() 1159 sourceTime = self._evalDT(datetimeString, sourceTime) 1160 1161 # Given string is a weekday 1162 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 1163 1164 start = datetime.datetime(yr, mth, dy, hr, mn, sec) 1165 wkdy = self.ptc.WeekdayOffsets[s] 1166 1167 if wkdy > wd: 1168 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1169 self.ptc.DOWParseStyle, 1170 self.ptc.CurrentDOWParseStyle) 1171 else: 1172 qty = self._CalculateDOWDelta(wd, wkdy, 2, 1173 self.ptc.DOWParseStyle, 1174 self.ptc.CurrentDOWParseStyle) 1175 1176 self.currentContext.updateAccuracy(pdtContext.ACU_DAY) 1177 target = start + datetime.timedelta(days=qty) 1178 return target.timetuple()
1179
1180 - def _evalTimeStr(self, datetimeString, sourceTime):
1181 """ 1182 Evaluate text passed by L{_partialParseTimeStr()} 1183 """ 1184 s = datetimeString.strip() 1185 sourceTime = self._evalDT(datetimeString, sourceTime) 1186 1187 if s in self.ptc.re_values['now']: 1188 self.currentContext.updateAccuracy(pdtContext.ACU_NOW) 1189 else: 1190 # Given string is a natural language time string like 1191 # lunch, midnight, etc 1192 sTime = self.ptc.getSource(s, sourceTime) 1193 if sTime: 1194 sourceTime = sTime 1195 self.currentContext.updateAccuracy(pdtContext.ACU_HALFDAY) 1196 1197 return sourceTime
1198
1199 - def _evalMeridian(self, datetimeString, sourceTime):
1200 """ 1201 Evaluate text passed by L{_partialParseMeridian()} 1202 """ 1203 s = datetimeString.strip() 1204 sourceTime = self._evalDT(datetimeString, sourceTime) 1205 1206 # Given string is in the format HH:MM(:SS)(am/pm) 1207 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 1208 1209 m = self.ptc.CRE_TIMEHMS2.search(s) 1210 if m is not None: 1211 dt = s[:m.start('meridian')].strip() 1212 if len(dt) <= 2: 1213 hr = int(dt) 1214 mn = 0 1215 sec = 0 1216 else: 1217 hr, mn, sec = _extract_time(m) 1218 1219 if hr == 24: 1220 hr = 0 1221 1222 meridian = m.group('meridian').lower() 1223 1224 # if 'am' found and hour is 12 - force hour to 0 (midnight) 1225 if (meridian in self.ptc.am) and hr == 12: 1226 hr = 0 1227 1228 # if 'pm' found and hour < 12, add 12 to shift to evening 1229 if (meridian in self.ptc.pm) and hr < 12: 1230 hr += 12 1231 1232 # time validation 1233 if hr < 24 and mn < 60 and sec < 60: 1234 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1235 _pop_time_accuracy(m, self.currentContext) 1236 1237 return sourceTime
1238
1239 - def _evalTimeStd(self, datetimeString, sourceTime):
1240 """ 1241 Evaluate text passed by L{_partialParseTimeStd()} 1242 """ 1243 s = datetimeString.strip() 1244 sourceTime = self._evalDT(datetimeString, sourceTime) 1245 1246 # Given string is in the format HH:MM(:SS) 1247 yr, mth, dy, hr, mn, sec, wd, yd, isdst = sourceTime 1248 1249 m = self.ptc.CRE_TIMEHMS.search(s) 1250 if m is not None: 1251 hr, mn, sec = _extract_time(m) 1252 if hr == 24: 1253 hr = 0 1254 1255 # time validation 1256 if hr < 24 and mn < 60 and sec < 60: 1257 sourceTime = (yr, mth, dy, hr, mn, sec, wd, yd, isdst) 1258 _pop_time_accuracy(m, self.currentContext) 1259 1260 return sourceTime
1261
1262 - def _UnitsTrapped(self, s, m, key):
1263 # check if a day suffix got trapped by a unit match 1264 # for example Dec 31st would match for 31s (aka 31 seconds) 1265 # Dec 31st 1266 # ^ ^ 1267 # | +-- m.start('units') 1268 # | and also m2.start('suffix') 1269 # +---- m.start('qty') 1270 # and also m2.start('day') 1271 m2 = self.ptc.CRE_DAY2.search(s) 1272 if m2 is not None: 1273 t = '%s%s' % (m2.group('day'), m.group(key)) 1274 if m.start(key) == m2.start('suffix') and \ 1275 m.start('qty') == m2.start('day') and \ 1276 m.group('qty') == t: 1277 return True 1278 else: 1279 return False 1280 else: 1281 return False
1282
1283 - def _partialParseModifier(self, s, sourceTime):
1284 """ 1285 test if giving C{s} matched CRE_MODIFIER, used by L{parse()} 1286 1287 @type s: string 1288 @param s: date/time text to evaluate 1289 @type sourceTime: struct_time 1290 @param sourceTime: C{struct_time} value to use as the base 1291 1292 @rtype: tuple 1293 @return: tuple of remained date/time text, datetime object and 1294 an boolean value to describ if matched or not 1295 1296 """ 1297 parseStr = None 1298 chunk1 = chunk2 = '' 1299 1300 # Modifier like next/prev/from/after/prior.. 1301 m = self.ptc.CRE_MODIFIER.search(s) 1302 if m is not None: 1303 if m.group() != s: 1304 # capture remaining string 1305 parseStr = m.group() 1306 chunk1 = s[:m.start()].strip() 1307 chunk2 = s[m.end():].strip() 1308 else: 1309 parseStr = s 1310 1311 if parseStr: 1312 debug and log.debug('found (modifier) [%s][%s][%s]', 1313 parseStr, chunk1, chunk2) 1314 s, sourceTime = self._evalModifier(parseStr, chunk1, 1315 chunk2, sourceTime) 1316 1317 return s, sourceTime, bool(parseStr)
1318
1319 - def _partialParseUnits(self, s, sourceTime):
1320 """ 1321 test if giving C{s} matched CRE_UNITS, used by L{parse()} 1322 1323 @type s: string 1324 @param s: date/time text to evaluate 1325 @type sourceTime: struct_time 1326 @param sourceTime: C{struct_time} value to use as the base 1327 1328 @rtype: tuple 1329 @return: tuple of remained date/time text, datetime object and 1330 an boolean value to describ if matched or not 1331 1332 """ 1333 parseStr = None 1334 chunk1 = chunk2 = '' 1335 1336 # Quantity + Units 1337 m = self.ptc.CRE_UNITS.search(s) 1338 if m is not None: 1339 debug and log.debug('CRE_UNITS matched') 1340 if self._UnitsTrapped(s, m, 'units'): 1341 debug and log.debug('day suffix trapped by unit match') 1342 else: 1343 if (m.group('qty') != s): 1344 # capture remaining string 1345 parseStr = m.group('qty') 1346 chunk1 = s[:m.start('qty')].strip() 1347 chunk2 = s[m.end('qty'):].strip() 1348 1349 if chunk1[-1:] == '-': 1350 parseStr = '-%s' % parseStr 1351 chunk1 = chunk1[:-1] 1352 1353 s = '%s %s' % (chunk1, chunk2) 1354 else: 1355 parseStr = s 1356 s = '' 1357 1358 if parseStr: 1359 debug and log.debug('found (units) [%s][%s][%s]', 1360 parseStr, chunk1, chunk2) 1361 sourceTime = self._evalUnits(parseStr, sourceTime) 1362 1363 return s, sourceTime, bool(parseStr)
1364
1365 - def _partialParseQUnits(self, s, sourceTime):
1366 """ 1367 test if giving C{s} matched CRE_QUNITS, used by L{parse()} 1368 1369 @type s: string 1370 @param s: date/time text to evaluate 1371 @type sourceTime: struct_time 1372 @param sourceTime: C{struct_time} value to use as the base 1373 1374 @rtype: tuple 1375 @return: tuple of remained date/time text, datetime object and 1376 an boolean value to describ if matched or not 1377 1378 """ 1379 parseStr = None 1380 chunk1 = chunk2 = '' 1381 1382 # Quantity + Units 1383 m = self.ptc.CRE_QUNITS.search(s) 1384 if m is not None: 1385 debug and log.debug('CRE_QUNITS matched') 1386 if self._UnitsTrapped(s, m, 'qunits'): 1387 debug and log.debug( 1388 'day suffix trapped by qunit match') 1389 else: 1390 if (m.group('qty') != s): 1391 # capture remaining string 1392 parseStr = m.group('qty') 1393 chunk1 = s[:m.start('qty')].strip() 1394 chunk2 = s[m.end('qty'):].strip() 1395 1396 if chunk1[-1:] == '-': 1397 parseStr = '-%s' % parseStr 1398 chunk1 = chunk1[:-1] 1399 1400 s = '%s %s' % (chunk1, chunk2) 1401 else: 1402 parseStr = s 1403 s = '' 1404 1405 if parseStr: 1406 debug and log.debug('found (qunits) [%s][%s][%s]', 1407 parseStr, chunk1, chunk2) 1408 sourceTime = self._evalQUnits(parseStr, sourceTime) 1409 1410 return s, sourceTime, bool(parseStr)
1411
1412 - def _partialParseDateStr(self, s, sourceTime):
1413 """ 1414 test if giving C{s} matched CRE_DATE3, used by L{parse()} 1415 1416 @type s: string 1417 @param s: date/time text to evaluate 1418 @type sourceTime: struct_time 1419 @param sourceTime: C{struct_time} value to use as the base 1420 1421 @rtype: tuple 1422 @return: tuple of remained date/time text, datetime object and 1423 an boolean value to describ if matched or not 1424 1425 """ 1426 parseStr = None 1427 chunk1 = chunk2 = '' 1428 1429 m = self.ptc.CRE_DATE3.search(s) 1430 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW 1431 # for match in self.ptc.CRE_DATE3.finditer(s): 1432 # to prevent "HH:MM(:SS) time strings" expressions from 1433 # triggering this regex, we checks if the month field 1434 # exists in the searched expression, if it doesn't exist, 1435 # the date field is not valid 1436 # if match.group('mthname'): 1437 # m = self.ptc.CRE_DATE3.search(s, match.start()) 1438 # valid_date = True 1439 # break 1440 1441 # String date format 1442 if m is not None: 1443 1444 if (m.group('date') != s): 1445 # capture remaining string 1446 mStart = m.start('date') 1447 mEnd = m.end('date') 1448 1449 # we need to check that anything following the parsed 1450 # date is a time expression because it is often picked 1451 # up as a valid year if the hour is 2 digits 1452 fTime = False 1453 mm = self.ptc.CRE_TIMEHMS2.search(s) 1454 # "February 24th 1PM" doesn't get caught 1455 # "February 24th 12PM" does 1456 mYear = m.group('year') 1457 if mm is not None and mYear is not None: 1458 fTime = True 1459 else: 1460 # "February 24th 12:00" 1461 mm = self.ptc.CRE_TIMEHMS.search(s) 1462 if mm is not None and mYear is None: 1463 fTime = True 1464 if fTime: 1465 hoursStart = mm.start('hours') 1466 1467 if hoursStart < m.end('year'): 1468 mEnd = hoursStart 1469 1470 parseStr = s[mStart:mEnd] 1471 chunk1 = s[:mStart] 1472 chunk2 = s[mEnd:] 1473 1474 s = '%s %s' % (chunk1, chunk2) 1475 else: 1476 parseStr = s 1477 s = '' 1478 1479 if parseStr: 1480 debug and log.debug( 1481 'found (date3) [%s][%s][%s]', parseStr, chunk1, chunk2) 1482 sourceTime = self._evalDateStr(parseStr, sourceTime) 1483 1484 return s, sourceTime, bool(parseStr)
1485
1486 - def _partialParseDateStd(self, s, sourceTime):
1487 """ 1488 test if giving C{s} matched CRE_DATE, used by L{parse()} 1489 1490 @type s: string 1491 @param s: date/time text to evaluate 1492 @type sourceTime: struct_time 1493 @param sourceTime: C{struct_time} value to use as the base 1494 1495 @rtype: tuple 1496 @return: tuple of remained date/time text, datetime object and 1497 an boolean value to describ if matched or not 1498 1499 """ 1500 parseStr = None 1501 chunk1 = chunk2 = '' 1502 1503 # Standard date format 1504 m = self.ptc.CRE_DATE.search(s) 1505 if m is not None: 1506 1507 if (m.group('date') != s): 1508 # capture remaining string 1509 parseStr = m.group('date') 1510 chunk1 = s[:m.start('date')] 1511 chunk2 = s[m.end('date'):] 1512 s = '%s %s' % (chunk1, chunk2) 1513 else: 1514 parseStr = s 1515 s = '' 1516 1517 if parseStr: 1518 debug and log.debug( 1519 'found (date) [%s][%s][%s]', parseStr, chunk1, chunk2) 1520 sourceTime = self._evalDateStd(parseStr, sourceTime) 1521 1522 return s, sourceTime, bool(parseStr)
1523
1524 - def _partialParseDayStr(self, s, sourceTime):
1525 """ 1526 test if giving C{s} matched CRE_DAY, used by L{parse()} 1527 1528 @type s: string 1529 @param s: date/time text to evaluate 1530 @type sourceTime: struct_time 1531 @param sourceTime: C{struct_time} value to use as the base 1532 1533 @rtype: tuple 1534 @return: tuple of remained date/time text, datetime object and 1535 an boolean value to describ if matched or not 1536 1537 """ 1538 parseStr = None 1539 chunk1 = chunk2 = '' 1540 1541 # Natural language day strings 1542 m = self.ptc.CRE_DAY.search(s) 1543 if m is not None: 1544 1545 if (m.group() != s): 1546 # capture remaining string 1547 parseStr = m.group() 1548 chunk1 = s[:m.start()] 1549 chunk2 = s[m.end():] 1550 s = '%s %s' % (chunk1, chunk2) 1551 else: 1552 parseStr = s 1553 s = '' 1554 1555 if parseStr: 1556 debug and log.debug( 1557 'found (day) [%s][%s][%s]', parseStr, chunk1, chunk2) 1558 sourceTime = self._evalDayStr(parseStr, sourceTime) 1559 1560 return s, sourceTime, bool(parseStr)
1561
1562 - def _partialParseWeekday(self, s, sourceTime):
1563 """ 1564 test if giving C{s} matched CRE_WEEKDAY, used by L{parse()} 1565 1566 @type s: string 1567 @param s: date/time text to evaluate 1568 @type sourceTime: struct_time 1569 @param sourceTime: C{struct_time} value to use as the base 1570 1571 @rtype: tuple 1572 @return: tuple of remained date/time text, datetime object and 1573 an boolean value to describ if matched or not 1574 1575 """ 1576 parseStr = None 1577 chunk1 = chunk2 = '' 1578 1579 ctx = self.currentContext 1580 log.debug('eval %s with context - %s, %s', s, ctx.hasDate, ctx.hasTime) 1581 1582 # Weekday 1583 m = self.ptc.CRE_WEEKDAY.search(s) 1584 if m is not None: 1585 gv = m.group() 1586 if s not in self.ptc.dayOffsets: 1587 1588 if (gv != s): 1589 # capture remaining string 1590 parseStr = gv 1591 chunk1 = s[:m.start()] 1592 chunk2 = s[m.end():] 1593 s = '%s %s' % (chunk1, chunk2) 1594 else: 1595 parseStr = s 1596 s = '' 1597 1598 if parseStr and not ctx.hasDate: 1599 debug and log.debug( 1600 'found (weekday) [%s][%s][%s]', parseStr, chunk1, chunk2) 1601 sourceTime = self._evalWeekday(parseStr, sourceTime) 1602 1603 return s, sourceTime, bool(parseStr)
1604
1605 - def _partialParseTimeStr(self, s, sourceTime):
1606 """ 1607 test if giving C{s} matched CRE_TIME, used by L{parse()} 1608 1609 @type s: string 1610 @param s: date/time text to evaluate 1611 @type sourceTime: struct_time 1612 @param sourceTime: C{struct_time} value to use as the base 1613 1614 @rtype: tuple 1615 @return: tuple of remained date/time text, datetime object and 1616 an boolean value to describ if matched or not 1617 1618 """ 1619 parseStr = None 1620 chunk1 = chunk2 = '' 1621 1622 # Natural language time strings 1623 m = self.ptc.CRE_TIME.search(s) 1624 if m is not None or s in self.ptc.re_values['now']: 1625 1626 if (m and m.group() != s): 1627 # capture remaining string 1628 parseStr = m.group() 1629 chunk1 = s[:m.start()] 1630 chunk2 = s[m.end():] 1631 s = '%s %s' % (chunk1, chunk2) 1632 else: 1633 parseStr = s 1634 s = '' 1635 1636 if parseStr: 1637 debug and log.debug( 1638 'found (time) [%s][%s][%s]', parseStr, chunk1, chunk2) 1639 sourceTime = self._evalTimeStr(parseStr, sourceTime) 1640 1641 return s, sourceTime, bool(parseStr)
1642
1643 - def _partialParseMeridian(self, s, sourceTime):
1644 """ 1645 test if giving C{s} matched CRE_TIMEHMS2, used by L{parse()} 1646 1647 @type s: string 1648 @param s: date/time text to evaluate 1649 @type sourceTime: struct_time 1650 @param sourceTime: C{struct_time} value to use as the base 1651 1652 @rtype: tuple 1653 @return: tuple of remained date/time text, datetime object and 1654 an boolean value to describ if matched or not 1655 1656 """ 1657 parseStr = None 1658 chunk1 = chunk2 = '' 1659 1660 # HH:MM(:SS) am/pm time strings 1661 m = self.ptc.CRE_TIMEHMS2.search(s) 1662 if m is not None: 1663 1664 if m.group('minutes') is not None: 1665 if m.group('seconds') is not None: 1666 parseStr = '%s:%s:%s' % (m.group('hours'), 1667 m.group('minutes'), 1668 m.group('seconds')) 1669 else: 1670 parseStr = '%s:%s' % (m.group('hours'), 1671 m.group('minutes')) 1672 else: 1673 parseStr = m.group('hours') 1674 parseStr += ' ' + m.group('meridian') 1675 1676 chunk1 = s[:m.start()] 1677 chunk2 = s[m.end():] 1678 1679 s = '%s %s' % (chunk1, chunk2) 1680 1681 if parseStr: 1682 debug and log.debug('found (meridian) [%s][%s][%s]', 1683 parseStr, chunk1, chunk2) 1684 sourceTime = self._evalMeridian(parseStr, sourceTime) 1685 1686 return s, sourceTime, bool(parseStr)
1687
1688 - def _partialParseTimeStd(self, s, sourceTime):
1689 """ 1690 test if giving C{s} matched CRE_TIMEHMS, used by L{parse()} 1691 1692 @type s: string 1693 @param s: date/time text to evaluate 1694 @type sourceTime: struct_time 1695 @param sourceTime: C{struct_time} value to use as the base 1696 1697 @rtype: tuple 1698 @return: tuple of remained date/time text, datetime object and 1699 an boolean value to describ if matched or not 1700 1701 """ 1702 parseStr = None 1703 chunk1 = chunk2 = '' 1704 1705 # HH:MM(:SS) time strings 1706 m = self.ptc.CRE_TIMEHMS.search(s) 1707 if m is not None: 1708 1709 if m.group('seconds') is not None: 1710 parseStr = '%s:%s:%s' % (m.group('hours'), 1711 m.group('minutes'), 1712 m.group('seconds')) 1713 chunk1 = s[:m.start('hours')] 1714 chunk2 = s[m.end('seconds'):] 1715 else: 1716 parseStr = '%s:%s' % (m.group('hours'), 1717 m.group('minutes')) 1718 chunk1 = s[:m.start('hours')] 1719 chunk2 = s[m.end('minutes'):] 1720 1721 s = '%s %s' % (chunk1, chunk2) 1722 1723 if parseStr: 1724 debug and log.debug( 1725 'found (hms) [%s][%s][%s]', parseStr, chunk1, chunk2) 1726 sourceTime = self._evalTimeStd(parseStr, sourceTime) 1727 1728 return s, sourceTime, bool(parseStr)
1729
1730 - def parseDT(self, datetimeString, sourceTime=None, 1731 tzinfo=None, version=None):
1732 """ 1733 C{datetimeString} is as C{.parse}, C{sourceTime} has the same semantic 1734 meaning as C{.parse}, but now also accepts datetime objects. C{tzinfo} 1735 accepts a tzinfo object. It is advisable to use pytz. 1736 1737 1738 @type datetimeString: string 1739 @param datetimeString: date/time text to evaluate 1740 @type sourceTime: struct_time, datetime, date, time 1741 @param sourceTime: time value to use as the base 1742 @type tzinfo: tzinfo 1743 @param tzinfo: Timezone to apply to generated datetime objs. 1744 @type version: integer 1745 @param version: style version, default will use L{Calendar} 1746 parameter version value 1747 1748 @rtype: tuple 1749 @return: tuple of: modified C{sourceTime} and the result flag/context 1750 1751 see .parse for return code details. 1752 """ 1753 # if sourceTime has a timetuple method, use thet, else, just pass the 1754 # entire thing to parse and prey the user knows what the hell they are 1755 # doing. 1756 sourceTime = getattr(sourceTime, 'timetuple', (lambda: sourceTime))() 1757 # You REALLY SHOULD be using pytz. Using localize if available, 1758 # hacking if not. Note, None is a valid tzinfo object in the case of 1759 # the ugly hack. 1760 localize = getattr( 1761 tzinfo, 1762 'localize', 1763 (lambda dt: dt.replace(tzinfo=tzinfo)), # ugly hack is ugly :( 1764 ) 1765 1766 # Punt 1767 time_struct, ret_code = self.parse( 1768 datetimeString, 1769 sourceTime=sourceTime, 1770 version=version) 1771 1772 # Comments from GHI indicate that it is desired to have the same return 1773 # signature on this method as that one it punts to, with the exception 1774 # of using datetime objects instead of time_structs. 1775 dt = localize(datetime.datetime(*time_struct[:6])) 1776 return dt, ret_code
1777
1778 - def parse(self, datetimeString, sourceTime=None, version=None):
1779 """ 1780 Splits the given C{datetimeString} into tokens, finds the regex 1781 patterns that match and then calculates a C{struct_time} value from 1782 the chunks. 1783 1784 If C{sourceTime} is given then the C{struct_time} value will be 1785 calculated from that value, otherwise from the current date/time. 1786 1787 If the C{datetimeString} is parsed and date/time value found, then:: 1788 1789 If C{version} equals to L{VERSION_FLAG_STYLE}, the second item of 1790 the returned tuple will be a flag to let you know what kind of 1791 C{struct_time} value is being returned:: 1792 1793 0 = not parsed at all 1794 1 = parsed as a C{date} 1795 2 = parsed as a C{time} 1796 3 = parsed as a C{datetime} 1797 1798 If C{version} equals to L{VERSION_CONTEXT_STYLE}, the second value 1799 will be an instance of L{pdtContext} 1800 1801 @type datetimeString: string 1802 @param datetimeString: date/time text to evaluate 1803 @type sourceTime: struct_time 1804 @param sourceTime: C{struct_time} value to use as the base 1805 @type version: integer 1806 @param version: style version, default will use L{Calendar} 1807 parameter version value 1808 1809 @rtype: tuple 1810 @return: tuple of: modified C{sourceTime} and the result flag/context 1811 """ 1812 debug and log.debug('parse()') 1813 1814 datetimeString = re.sub(r'(\w)\.(\s)', r'\1\2', datetimeString) 1815 datetimeString = re.sub(r'(\w)[\'"](\s|$)', r'\1 \2', datetimeString) 1816 datetimeString = re.sub(r'(\s|^)[\'"](\w)', r'\1 \2', datetimeString) 1817 1818 if sourceTime: 1819 if isinstance(sourceTime, datetime.datetime): 1820 debug and log.debug('coercing datetime to timetuple') 1821 sourceTime = sourceTime.timetuple() 1822 else: 1823 if not isinstance(sourceTime, time.struct_time) and \ 1824 not isinstance(sourceTime, tuple): 1825 raise ValueError('sourceTime is not a struct_time') 1826 else: 1827 sourceTime = time.localtime() 1828 1829 with self.context() as ctx: 1830 s = datetimeString.lower().strip() 1831 debug and log.debug('remainedString (before parsing): [%s]', s) 1832 1833 while s: 1834 for parseMeth in (self._partialParseModifier, 1835 self._partialParseUnits, 1836 self._partialParseQUnits, 1837 self._partialParseDateStr, 1838 self._partialParseDateStd, 1839 self._partialParseDayStr, 1840 self._partialParseWeekday, 1841 self._partialParseTimeStr, 1842 self._partialParseMeridian, 1843 self._partialParseTimeStd): 1844 retS, retTime, matched = parseMeth(s, sourceTime) 1845 if matched: 1846 s, sourceTime = retS.strip(), retTime 1847 break 1848 else: 1849 # nothing matched 1850 s = '' 1851 1852 debug and log.debug('hasDate: [%s], hasTime: [%s]', 1853 ctx.hasDate, ctx.hasTime) 1854 debug and log.debug('remainedString: [%s]', s) 1855 1856 # String is not parsed at all 1857 if sourceTime is None: 1858 debug and log.debug('not parsed [%s]', str(sourceTime)) 1859 sourceTime = time.localtime() 1860 1861 if not isinstance(sourceTime, time.struct_time): 1862 sourceTime = time.struct_time(sourceTime) 1863 1864 version = self.version if version is None else version 1865 if version == VERSION_CONTEXT_STYLE: 1866 return sourceTime, ctx 1867 else: 1868 return sourceTime, ctx.dateTimeFlag
1869
1870 - def inc(self, source, month=None, year=None):
1871 """ 1872 Takes the given C{source} date, or current date if none is 1873 passed, and increments it according to the values passed in 1874 by month and/or year. 1875 1876 This routine is needed because Python's C{timedelta()} function 1877 does not allow for month or year increments. 1878 1879 @type source: struct_time 1880 @param source: C{struct_time} value to increment 1881 @type month: float or integer 1882 @param month: optional number of months to increment 1883 @type year: float or integer 1884 @param year: optional number of years to increment 1885 1886 @rtype: datetime 1887 @return: C{source} incremented by the number of months and/or years 1888 """ 1889 yr = source.year 1890 mth = source.month 1891 dy = source.day 1892 1893 try: 1894 month = float(month) 1895 except (TypeError, ValueError): 1896 month = 0 1897 1898 try: 1899 year = float(year) 1900 except (TypeError, ValueError): 1901 year = 0 1902 finally: 1903 month += year * 12 1904 year = 0 1905 1906 subMi = 0.0 1907 maxDay = 0 1908 if month: 1909 mi = int(month) 1910 subMi = month - mi 1911 1912 y = int(mi / 12.0) 1913 m = mi - y * 12 1914 1915 mth = mth + m 1916 if mth < 1: # cross start-of-year? 1917 y -= 1 # yes - decrement year 1918 mth += 12 # and fix month 1919 elif mth > 12: # cross end-of-year? 1920 y += 1 # yes - increment year 1921 mth -= 12 # and fix month 1922 1923 yr += y 1924 1925 # if the day ends up past the last day of 1926 # the new month, set it to the last day 1927 maxDay = self.ptc.daysInMonth(mth, yr) 1928 if dy > maxDay: 1929 dy = maxDay 1930 1931 if yr > datetime.MAXYEAR or yr < datetime.MINYEAR: 1932 raise OverflowError('year is out of range') 1933 1934 d = source.replace(year=yr, month=mth, day=dy) 1935 if subMi: 1936 d += datetime.timedelta(days=subMi * maxDay) 1937 return source + (d - source)
1938
1939 - def nlp(self, inputString, sourceTime=None, version=None):
1940 """Utilizes parse() after making judgements about what datetime 1941 information belongs together. 1942 1943 It makes logical groupings based on proximity and returns a parsed 1944 datetime for each matched grouping of datetime text, along with 1945 location info within the given inputString. 1946 1947 @type inputString: string 1948 @param inputString: natural language text to evaluate 1949 @type sourceTime: struct_time 1950 @param sourceTime: C{struct_time} value to use as the base 1951 @type version: integer 1952 @param version: style version, default will use L{Calendar} 1953 parameter version value 1954 1955 @rtype: tuple or None 1956 @return: tuple of tuples in the format (parsed_datetime as 1957 datetime.datetime, flags as int, start_pos as int, 1958 end_pos as int, matched_text as string) or None if there 1959 were no matches 1960 """ 1961 1962 orig_inputstring = inputString 1963 1964 # replace periods at the end of sentences w/ spaces 1965 # opposed to removing them altogether in order to 1966 # retain relative positions (identified by alpha, period, space). 1967 # this is required for some of the regex patterns to match 1968 inputString = re.sub(r'(\w)(\.)(\s)', r'\1 \3', inputString).lower() 1969 inputString = re.sub(r'(\w)(\'|")(\s|$)', r'\1 \3', inputString) 1970 inputString = re.sub(r'(\s|^)(\'|")(\w)', r'\1 \3', inputString) 1971 1972 startpos = 0 # the start position in the inputString during the loop 1973 1974 # list of lists in format: 1975 # [startpos, endpos, matchedstring, flags, type] 1976 matches = [] 1977 1978 while startpos < len(inputString): 1979 1980 # empty match 1981 leftmost_match = [0, 0, None, 0, None] 1982 1983 # Modifier like next\prev.. 1984 m = self.ptc.CRE_MODIFIER.search(inputString[startpos:]) 1985 if m is not None: 1986 if leftmost_match[1] == 0 or \ 1987 leftmost_match[0] > m.start() + startpos: 1988 leftmost_match[0] = m.start() + startpos 1989 leftmost_match[1] = m.end() + startpos 1990 leftmost_match[2] = m.group() 1991 leftmost_match[3] = 0 1992 leftmost_match[4] = 'modifier' 1993 1994 # Quantity + Units 1995 m = self.ptc.CRE_UNITS.search(inputString[startpos:]) 1996 if m is not None: 1997 debug and log.debug('CRE_UNITS matched') 1998 if self._UnitsTrapped(inputString[startpos:], m, 'units'): 1999 debug and log.debug('day suffix trapped by unit match') 2000 else: 2001 2002 if leftmost_match[1] == 0 or \ 2003 leftmost_match[0] > m.start('qty') + startpos: 2004 leftmost_match[0] = m.start('qty') + startpos 2005 leftmost_match[1] = m.end('qty') + startpos 2006 leftmost_match[2] = m.group('qty') 2007 leftmost_match[3] = 3 2008 leftmost_match[4] = 'units' 2009 2010 if m.start('qty') > 0 and \ 2011 inputString[m.start('qty') - 1] == '-': 2012 leftmost_match[0] = leftmost_match[0] - 1 2013 leftmost_match[2] = '-' + leftmost_match[2] 2014 2015 # Quantity + Units 2016 m = self.ptc.CRE_QUNITS.search(inputString[startpos:]) 2017 if m is not None: 2018 debug and log.debug('CRE_QUNITS matched') 2019 if self._UnitsTrapped(inputString[startpos:], m, 'qunits'): 2020 debug and log.debug('day suffix trapped by qunit match') 2021 else: 2022 if leftmost_match[1] == 0 or \ 2023 leftmost_match[0] > m.start('qty') + startpos: 2024 leftmost_match[0] = m.start('qty') + startpos 2025 leftmost_match[1] = m.end('qty') + startpos 2026 leftmost_match[2] = m.group('qty') 2027 leftmost_match[3] = 3 2028 leftmost_match[4] = 'qunits' 2029 2030 if m.start('qty') > 0 and \ 2031 inputString[m.start('qty') - 1] == '-': 2032 leftmost_match[0] = leftmost_match[0] - 1 2033 leftmost_match[2] = '-' + leftmost_match[2] 2034 2035 m = self.ptc.CRE_DATE3.search(inputString[startpos:]) 2036 # NO LONGER NEEDED, THE REGEXP HANDLED MTHNAME NOW 2037 # for match in self.ptc.CRE_DATE3.finditer(inputString[startpos:]): 2038 # to prevent "HH:MM(:SS) time strings" expressions from 2039 # triggering this regex, we checks if the month field exists 2040 # in the searched expression, if it doesn't exist, the date 2041 # field is not valid 2042 # if match.group('mthname'): 2043 # m = self.ptc.CRE_DATE3.search(inputString[startpos:], 2044 # match.start()) 2045 # break 2046 2047 # String date format 2048 if m is not None: 2049 if leftmost_match[1] == 0 or \ 2050 leftmost_match[0] > m.start('date') + startpos: 2051 leftmost_match[0] = m.start('date') + startpos 2052 leftmost_match[1] = m.end('date') + startpos 2053 leftmost_match[2] = m.group('date') 2054 leftmost_match[3] = 1 2055 leftmost_match[4] = 'dateStr' 2056 2057 # Standard date format 2058 m = self.ptc.CRE_DATE.search(inputString[startpos:]) 2059 if m is not None: 2060 if leftmost_match[1] == 0 or \ 2061 leftmost_match[0] > m.start('date') + startpos: 2062 leftmost_match[0] = m.start('date') + startpos 2063 leftmost_match[1] = m.end('date') + startpos 2064 leftmost_match[2] = m.group('date') 2065 leftmost_match[3] = 1 2066 leftmost_match[4] = 'dateStd' 2067 2068 # Natural language day strings 2069 m = self.ptc.CRE_DAY.search(inputString[startpos:]) 2070 if m is not None: 2071 if leftmost_match[1] == 0 or \ 2072 leftmost_match[0] > m.start() + startpos: 2073 leftmost_match[0] = m.start() + startpos 2074 leftmost_match[1] = m.end() + startpos 2075 leftmost_match[2] = m.group() 2076 leftmost_match[3] = 1 2077 leftmost_match[4] = 'dayStr' 2078 2079 # Weekday 2080 m = self.ptc.CRE_WEEKDAY.search(inputString[startpos:]) 2081 if m is not None: 2082 if inputString[startpos:] not in self.ptc.dayOffsets: 2083 if leftmost_match[1] == 0 or \ 2084 leftmost_match[0] > m.start() + startpos: 2085 leftmost_match[0] = m.start() + startpos 2086 leftmost_match[1] = m.end() + startpos 2087 leftmost_match[2] = m.group() 2088 leftmost_match[3] = 1 2089 leftmost_match[4] = 'weekdy' 2090 2091 # Natural language time strings 2092 m = self.ptc.CRE_TIME.search(inputString[startpos:]) 2093 if m is not None: 2094 if leftmost_match[1] == 0 or \ 2095 leftmost_match[0] > m.start() + startpos: 2096 leftmost_match[0] = m.start() + startpos 2097 leftmost_match[1] = m.end() + startpos 2098 leftmost_match[2] = m.group() 2099 leftmost_match[3] = 2 2100 leftmost_match[4] = 'timeStr' 2101 2102 # HH:MM(:SS) am/pm time strings 2103 m = self.ptc.CRE_TIMEHMS2.search(inputString[startpos:]) 2104 if m is not None: 2105 if leftmost_match[1] == 0 or \ 2106 leftmost_match[0] > m.start('hours') + startpos: 2107 leftmost_match[0] = m.start('hours') + startpos 2108 leftmost_match[1] = m.end('meridian') + startpos 2109 leftmost_match[2] = inputString[leftmost_match[0]: 2110 leftmost_match[1]] 2111 leftmost_match[3] = 2 2112 leftmost_match[4] = 'meridian' 2113 2114 # HH:MM(:SS) time strings 2115 m = self.ptc.CRE_TIMEHMS.search(inputString[startpos:]) 2116 if m is not None: 2117 if leftmost_match[1] == 0 or \ 2118 leftmost_match[0] > m.start('hours') + startpos: 2119 leftmost_match[0] = m.start('hours') + startpos 2120 if m.group('seconds') is not None: 2121 leftmost_match[1] = m.end('seconds') + startpos 2122 else: 2123 leftmost_match[1] = m.end('minutes') + startpos 2124 leftmost_match[2] = inputString[leftmost_match[0]: 2125 leftmost_match[1]] 2126 leftmost_match[3] = 2 2127 leftmost_match[4] = 'timeStd' 2128 2129 # Units only; must be preceded by a modifier 2130 if len(matches) > 0 and matches[-1][3] == 0: 2131 m = self.ptc.CRE_UNITS_ONLY.search(inputString[startpos:]) 2132 # Ensure that any match is immediately proceded by the 2133 # modifier. "Next is the word 'month'" should not parse as a 2134 # date while "next month" should 2135 if m is not None and \ 2136 inputString[startpos:startpos + m.start()].strip() == '': 2137 debug and log.debug('CRE_UNITS_ONLY matched [%s]', 2138 m.group()) 2139 if leftmost_match[1] == 0 or \ 2140 leftmost_match[0] > m.start() + startpos: 2141 leftmost_match[0] = m.start() + startpos 2142 leftmost_match[1] = m.end() + startpos 2143 leftmost_match[2] = m.group() 2144 leftmost_match[3] = 3 2145 leftmost_match[4] = 'unitsOnly' 2146 2147 # set the start position to the end pos of the leftmost match 2148 startpos = leftmost_match[1] 2149 2150 # nothing was detected 2151 # so break out of the loop 2152 if startpos == 0: 2153 startpos = len(inputString) 2154 else: 2155 if leftmost_match[3] > 0: 2156 m = self.ptc.CRE_NLP_PREFIX.search( 2157 inputString[:leftmost_match[0]] + ' ' + str(leftmost_match[3])) 2158 if m is not None: 2159 leftmost_match[0] = m.start('nlp_prefix') 2160 leftmost_match[2] = inputString[leftmost_match[0]: 2161 leftmost_match[1]] 2162 matches.append(leftmost_match) 2163 2164 # find matches in proximity with one another and 2165 # return all the parsed values 2166 proximity_matches = [] 2167 if len(matches) > 1: 2168 combined = '' 2169 from_match_index = 0 2170 date = matches[0][3] == 1 2171 time = matches[0][3] == 2 2172 units = matches[0][3] == 3 2173 for i in range(1, len(matches)): 2174 2175 # test proximity (are there characters between matches?) 2176 endofprevious = matches[i - 1][1] 2177 begofcurrent = matches[i][0] 2178 if orig_inputstring[endofprevious: 2179 begofcurrent].lower().strip() != '': 2180 # this one isn't in proximity, but maybe 2181 # we have enough to make a datetime 2182 # TODO: make sure the combination of 2183 # formats (modifier, dateStd, etc) makes logical sense 2184 # before parsing together 2185 if date or time or units: 2186 combined = orig_inputstring[matches[from_match_index] 2187 [0]:matches[i - 1][1]] 2188 parsed_datetime, flags = self.parse(combined, 2189 sourceTime, 2190 version) 2191 proximity_matches.append(( 2192 datetime.datetime(*parsed_datetime[:6]), 2193 flags, 2194 matches[from_match_index][0], 2195 matches[i - 1][1], 2196 combined)) 2197 # not in proximity, reset starting from current 2198 from_match_index = i 2199 date = matches[i][3] == 1 2200 time = matches[i][3] == 2 2201 units = matches[i][3] == 3 2202 continue 2203 else: 2204 if matches[i][3] == 1: 2205 date = True 2206 if matches[i][3] == 2: 2207 time = True 2208 if matches[i][3] == 3: 2209 units = True 2210 2211 # check last 2212 # we have enough to make a datetime 2213 if date or time or units: 2214 combined = orig_inputstring[matches[from_match_index][0]: 2215 matches[len(matches) - 1][1]] 2216 parsed_datetime, flags = self.parse(combined, sourceTime, 2217 version) 2218 proximity_matches.append(( 2219 datetime.datetime(*parsed_datetime[:6]), 2220 flags, 2221 matches[from_match_index][0], 2222 matches[len(matches) - 1][1], 2223 combined)) 2224 2225 elif len(matches) == 0: 2226 return None 2227 else: 2228 if matches[0][3] == 0: # not enough info to parse 2229 return None 2230 else: 2231 combined = orig_inputstring[matches[0][0]:matches[0][1]] 2232 parsed_datetime, flags = self.parse(matches[0][2], sourceTime, 2233 version) 2234 proximity_matches.append(( 2235 datetime.datetime(*parsed_datetime[:6]), 2236 flags, 2237 matches[0][0], 2238 matches[0][1], 2239 combined)) 2240 2241 return tuple(proximity_matches)
2242
2243 2244 -def _initSymbols(ptc):
2245 """ 2246 Initialize symbols and single character constants. 2247 """ 2248 # build am and pm lists to contain 2249 # original case, lowercase, first-char and dotted 2250 # versions of the meridian text 2251 ptc.am = ['', ''] 2252 ptc.pm = ['', ''] 2253 for idx, xm in enumerate(ptc.locale.meridian[:2]): 2254 # 0: am 2255 # 1: pm 2256 target = ['am', 'pm'][idx] 2257 setattr(ptc, target, [xm]) 2258 target = getattr(ptc, target) 2259 if xm: 2260 lxm = xm.lower() 2261 target.extend((xm[0], '{0}.{1}.'.format(*xm), 2262 lxm, lxm[0], '{0}.{1}.'.format(*lxm)))
2263
2264 2265 -class Constants(object):
2266 2267 """ 2268 Default set of constants for parsedatetime. 2269 2270 If PyICU is present, then the class will first try to get PyICU 2271 to return a locale specified by C{localeID}. If either C{localeID} is 2272 None or if the locale does not exist within PyICU, then each of the 2273 locales defined in C{fallbackLocales} is tried in order. 2274 2275 If PyICU is not present or none of the specified locales can be used, 2276 then the class will initialize itself to the en_US locale. 2277 2278 if PyICU is not present or not requested, only the locales defined by 2279 C{pdtLocales} will be searched. 2280 """ 2281
2282 - def __init__(self, localeID=None, usePyICU=True, 2283 fallbackLocales=['en_US']):
2284 self.localeID = localeID 2285 self.fallbackLocales = fallbackLocales[:] 2286 2287 if 'en_US' not in self.fallbackLocales: 2288 self.fallbackLocales.append('en_US') 2289 2290 # define non-locale specific constants 2291 self.locale = None 2292 self.usePyICU = usePyICU 2293 2294 # starting cache of leap years 2295 # daysInMonth will add to this if during 2296 # runtime it gets a request for a year not found 2297 self._leapYears = list(range(1904, 2097, 4)) 2298 2299 self.Second = 1 2300 self.Minute = 60 # 60 * self.Second 2301 self.Hour = 3600 # 60 * self.Minute 2302 self.Day = 86400 # 24 * self.Hour 2303 self.Week = 604800 # 7 * self.Day 2304 self.Month = 2592000 # 30 * self.Day 2305 self.Year = 31536000 # 365 * self.Day 2306 2307 self._DaysInMonthList = (31, 28, 31, 30, 31, 30, 2308 31, 31, 30, 31, 30, 31) 2309 self.rangeSep = '-' 2310 self.BirthdayEpoch = 50 2311 2312 # When True the starting time for all relative calculations will come 2313 # from the given SourceTime, otherwise it will be self.StartHour 2314 2315 self.StartTimeFromSourceTime = False 2316 2317 # The hour of the day that will be used as the starting time for all 2318 # relative calculations when self.StartTimeFromSourceTime is False 2319 2320 self.StartHour = 9 2321 2322 # YearParseStyle controls how we parse "Jun 12", i.e. dates that do 2323 # not have a year present. The default is to compare the date given 2324 # to the current date, and if prior, then assume the next year. 2325 # Setting this to 0 will prevent that. 2326 2327 self.YearParseStyle = 1 2328 2329 # DOWParseStyle controls how we parse "Tuesday" 2330 # If the current day was Thursday and the text to parse is "Tuesday" 2331 # then the following table shows how each style would be returned 2332 # -1, 0, +1 2333 # 2334 # Current day marked as *** 2335 # 2336 # Sun Mon Tue Wed Thu Fri Sat 2337 # week -1 2338 # current -1,0 *** 2339 # week +1 +1 2340 # 2341 # If the current day was Monday and the text to parse is "Tuesday" 2342 # then the following table shows how each style would be returned 2343 # -1, 0, +1 2344 # 2345 # Sun Mon Tue Wed Thu Fri Sat 2346 # week -1 -1 2347 # current *** 0,+1 2348 # week +1 2349 2350 self.DOWParseStyle = 1 2351 2352 # CurrentDOWParseStyle controls how we parse "Friday" 2353 # If the current day was Friday and the text to parse is "Friday" 2354 # then the following table shows how each style would be returned 2355 # True/False. This also depends on DOWParseStyle. 2356 # 2357 # Current day marked as *** 2358 # 2359 # DOWParseStyle = 0 2360 # Sun Mon Tue Wed Thu Fri Sat 2361 # week -1 2362 # current T,F 2363 # week +1 2364 # 2365 # DOWParseStyle = -1 2366 # Sun Mon Tue Wed Thu Fri Sat 2367 # week -1 F 2368 # current T 2369 # week +1 2370 # 2371 # DOWParseStyle = +1 2372 # 2373 # Sun Mon Tue Wed Thu Fri Sat 2374 # week -1 2375 # current T 2376 # week +1 F 2377 2378 self.CurrentDOWParseStyle = False 2379 2380 if self.usePyICU: 2381 self.locale = get_icu(self.localeID) 2382 2383 if self.locale.icu is None: 2384 self.usePyICU = False 2385 self.locale = None 2386 2387 if self.locale is None: 2388 if self.localeID not in pdtLocales: 2389 for localeId in range(0, len(self.fallbackLocales)): 2390 self.localeID = self.fallbackLocales[localeId] 2391 if self.localeID in pdtLocales: 2392 break 2393 2394 self.locale = pdtLocales[self.localeID] 2395 2396 if self.locale is not None: 2397 2398 def _getLocaleDataAdjusted(localeData): 2399 """ 2400 If localeData is defined as ["mon|mnd", 'tu|tues'...] then this 2401 function splits those definitions on | 2402 """ 2403 adjusted = [] 2404 for d in localeData: 2405 if '|' in d: 2406 adjusted += d.split("|") 2407 else: 2408 adjusted.append(d) 2409 return adjusted
2410 2411 def re_join(g): 2412 return '|'.join(re.escape(i) for i in g)
2413 2414 mths = _getLocaleDataAdjusted(self.locale.Months) 2415 smths = _getLocaleDataAdjusted(self.locale.shortMonths) 2416 swds = _getLocaleDataAdjusted(self.locale.shortWeekdays) 2417 wds = _getLocaleDataAdjusted(self.locale.Weekdays) 2418 2419 # escape any regex special characters that may be found 2420 self.locale.re_values['months'] = re_join(mths) 2421 self.locale.re_values['shortmonths'] = re_join(smths) 2422 self.locale.re_values['days'] = re_join(wds) 2423 self.locale.re_values['shortdays'] = re_join(swds) 2424 self.locale.re_values['dayoffsets'] = \ 2425 re_join(self.locale.dayOffsets) 2426 self.locale.re_values['numbers'] = \ 2427 re_join(self.locale.numbers) 2428 self.locale.re_values['decimal_mark'] = \ 2429 re.escape(self.locale.decimal_mark) 2430 2431 units = [unit for units in self.locale.units.values() 2432 for unit in units] # flatten 2433 units.sort(key=len, reverse=True) # longest first 2434 self.locale.re_values['units'] = re_join(units) 2435 self.locale.re_values['modifiers'] = re_join(self.locale.Modifiers) 2436 self.locale.re_values['sources'] = re_join(self.locale.re_sources) 2437 2438 # For distinguishing numeric dates from times, look for timeSep 2439 # and meridian, if specified in the locale 2440 self.locale.re_values['timecomponents'] = \ 2441 re_join(self.locale.timeSep + self.locale.meridian) 2442 2443 # build weekday offsets - yes, it assumes the Weekday and 2444 # shortWeekday lists are in the same order and Mon..Sun 2445 # (Python style) 2446 def _buildOffsets(offsetDict, localeData, indexStart): 2447 o = indexStart 2448 for key in localeData: 2449 if '|' in key: 2450 for k in key.split('|'): 2451 offsetDict[k] = o 2452 else: 2453 offsetDict[key] = o 2454 o += 1 2455 2456 _buildOffsets(self.locale.WeekdayOffsets, 2457 self.locale.Weekdays, 0) 2458 _buildOffsets(self.locale.WeekdayOffsets, 2459 self.locale.shortWeekdays, 0) 2460 2461 # build month offsets - yes, it assumes the Months and shortMonths 2462 # lists are in the same order and Jan..Dec 2463 _buildOffsets(self.locale.MonthOffsets, 2464 self.locale.Months, 1) 2465 _buildOffsets(self.locale.MonthOffsets, 2466 self.locale.shortMonths, 1) 2467 2468 _initSymbols(self) 2469 2470 # TODO: add code to parse the date formats and build the regexes up 2471 # from sub-parts, find all hard-coded uses of date/time separators 2472 2473 # not being used in code, but kept in case others are manually 2474 # utilizing this regex for their own purposes 2475 self.RE_DATE4 = r'''(?P<date> 2476 ( 2477 ( 2478 (?P<day>\d\d?) 2479 (?P<suffix>{daysuffix})? 2480 (,)? 2481 (\s)* 2482 ) 2483 (?P<mthname> 2484 \b({months}|{shortmonths})\b 2485 )\s* 2486 (?P<year>\d\d 2487 (\d\d)? 2488 )? 2489 ) 2490 )'''.format(**self.locale.re_values) 2491 2492 # still not completely sure of the behavior of the regex and 2493 # whether it would be best to consume all possible irrelevant 2494 # characters before the option groups (but within the {1,3} repetition 2495 # group or inside of each option group, as it currently does 2496 # however, right now, all tests are passing that were, 2497 # including fixing the bug of matching a 4-digit year as ddyy 2498 # when the day is absent from the string 2499 self.RE_DATE3 = r'''(?P<date> 2500 (?: 2501 (?:^|\s+) 2502 (?P<mthname> 2503 {months}|{shortmonths} 2504 )\b 2505 | 2506 (?:^|\s+) 2507 (?P<day>[1-9]|[012]\d|3[01]) 2508 (?P<suffix>{daysuffix}|)\b 2509 (?!\s*(?:{timecomponents})) 2510 | 2511 ,?\s+ 2512 (?P<year>\d\d(?:\d\d|))\b 2513 (?!\s*(?:{timecomponents})) 2514 ){{1,3}} 2515 (?(mthname)|$-^) 2516 )'''.format(**self.locale.re_values) 2517 2518 # not being used in code, but kept in case others are manually 2519 # utilizing this regex for their own purposes 2520 self.RE_MONTH = r'''(\s+|^) 2521 (?P<month> 2522 ( 2523 (?P<mthname> 2524 \b({months}|{shortmonths})\b 2525 ) 2526 (\s* 2527 (?P<year>(\d{{4}})) 2528 )? 2529 ) 2530 ) 2531 (?=\s+|$|[^\w])'''.format(**self.locale.re_values) 2532 2533 self.RE_WEEKDAY = r'''\b 2534 (?: 2535 {days}|{shortdays} 2536 ) 2537 \b'''.format(**self.locale.re_values) 2538 2539 self.RE_NUMBER = (r'(\b(?:{numbers})\b|\d+(?:{decimal_mark}\d+|))' 2540 .format(**self.locale.re_values)) 2541 2542 self.RE_SPECIAL = (r'(?P<special>^[{specials}]+)\s+' 2543 .format(**self.locale.re_values)) 2544 2545 self.RE_UNITS_ONLY = (r'''\b({units})\b''' 2546 .format(**self.locale.re_values)) 2547 2548 self.RE_UNITS = r'''\b(?P<qty> 2549 -? 2550 (?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\b)\s* 2551 (?P<units>{units}) 2552 )\b'''.format(**self.locale.re_values) 2553 2554 self.RE_QUNITS = r'''\b(?P<qty> 2555 -? 2556 (?:\d+(?:{decimal_mark}\d+|)|(?:{numbers})\s+)\s* 2557 (?P<qunits>{qunits}) 2558 )\b'''.format(**self.locale.re_values) 2559 2560 self.RE_MODIFIER = r'''\b(?: 2561 {modifiers} 2562 )\b'''.format(**self.locale.re_values) 2563 2564 self.RE_TIMEHMS = r'''([\s(\["'-]|^) 2565 (?P<hours>\d\d?) 2566 (?P<tsep>{timeseparator}|) 2567 (?P<minutes>\d\d) 2568 (?:(?P=tsep) 2569 (?P<seconds>\d\d 2570 (?:[\.,]\d+)? 2571 ) 2572 )?\b'''.format(**self.locale.re_values) 2573 2574 self.RE_TIMEHMS2 = r'''([\s(\["'-]|^) 2575 (?P<hours>\d\d?) 2576 (?: 2577 (?P<tsep>{timeseparator}|) 2578 (?P<minutes>\d\d?) 2579 (?:(?P=tsep) 2580 (?P<seconds>\d\d? 2581 (?:[\.,]\d+)? 2582 ) 2583 )? 2584 )?'''.format(**self.locale.re_values) 2585 2586 # 1, 2, and 3 here refer to the type of match date, time, or units 2587 self.RE_NLP_PREFIX = r'''\b(?P<nlp_prefix> 2588 (on) 2589 (\s)+1 2590 | 2591 (at|in) 2592 (\s)+2 2593 | 2594 (in) 2595 (\s)+3 2596 )''' 2597 2598 if 'meridian' in self.locale.re_values: 2599 self.RE_TIMEHMS2 += (r'\s*(?P<meridian>{meridian})\b' 2600 .format(**self.locale.re_values)) 2601 else: 2602 self.RE_TIMEHMS2 += r'\b' 2603 2604 # Always support common . and - separators 2605 dateSeps = ''.join(re.escape(s) 2606 for s in self.locale.dateSep + ['-', '.']) 2607 2608 self.RE_DATE = r'''([\s(\["'-]|^) 2609 (?P<date> 2610 \d\d?[{0}]\d\d?(?:[{0}]\d\d(?:\d\d)?)? 2611 | 2612 \d{{4}}[{0}]\d\d?[{0}]\d\d? 2613 ) 2614 \b'''.format(dateSeps) 2615 2616 self.RE_DATE2 = r'[{0}]'.format(dateSeps) 2617 2618 assert 'dayoffsets' in self.locale.re_values 2619 2620 self.RE_DAY = r'''\b 2621 (?: 2622 {dayoffsets} 2623 ) 2624 \b'''.format(**self.locale.re_values) 2625 2626 self.RE_DAY2 = r'''(?P<day>\d\d?) 2627 (?P<suffix>{daysuffix})? 2628 '''.format(**self.locale.re_values) 2629 2630 self.RE_TIME = r'''\b 2631 (?: 2632 {sources} 2633 ) 2634 \b'''.format(**self.locale.re_values) 2635 2636 self.RE_REMAINING = r'\s+' 2637 2638 # Regex for date/time ranges 2639 self.RE_RTIMEHMS = r'''(\s*|^) 2640 (\d\d?){timeseparator} 2641 (\d\d) 2642 ({timeseparator}(\d\d))? 2643 (\s*|$)'''.format(**self.locale.re_values) 2644 2645 self.RE_RTIMEHMS2 = (r'''(\s*|^) 2646 (\d\d?) 2647 ({timeseparator}(\d\d?))? 2648 ({timeseparator}(\d\d?))?''' 2649 .format(**self.locale.re_values)) 2650 2651 if 'meridian' in self.locale.re_values: 2652 self.RE_RTIMEHMS2 += (r'\s*({meridian})' 2653 .format(**self.locale.re_values)) 2654 2655 self.RE_RDATE = r'(\d+([%s]\d+)+)' % dateSeps 2656 self.RE_RDATE3 = r'''( 2657 ( 2658 ( 2659 \b({months})\b 2660 )\s* 2661 ( 2662 (\d\d?) 2663 (\s?|{daysuffix}|$)+ 2664 )? 2665 (,\s*\d{{4}})? 2666 ) 2667 )'''.format(**self.locale.re_values) 2668 2669 # "06/07/06 - 08/09/06" 2670 self.DATERNG1 = (r'{0}\s*{rangeseparator}\s*{0}' 2671 .format(self.RE_RDATE, **self.locale.re_values)) 2672 2673 # "march 31 - june 1st, 2006" 2674 self.DATERNG2 = (r'{0}\s*{rangeseparator}\s*{0}' 2675 .format(self.RE_RDATE3, **self.locale.re_values)) 2676 2677 # "march 1rd -13th" 2678 self.DATERNG3 = (r'{0}\s*{rangeseparator}\s*(\d\d?)\s*(rd|st|nd|th)?' 2679 .format(self.RE_RDATE3, **self.locale.re_values)) 2680 2681 # "4:00:55 pm - 5:90:44 am", '4p-5p' 2682 self.TIMERNG1 = (r'{0}\s*{rangeseparator}\s*{0}' 2683 .format(self.RE_RTIMEHMS2, **self.locale.re_values)) 2684 2685 self.TIMERNG2 = (r'{0}\s*{rangeseparator}\s*{0}' 2686 .format(self.RE_RTIMEHMS, **self.locale.re_values)) 2687 2688 # "4-5pm " 2689 self.TIMERNG3 = (r'\d\d?\s*{rangeseparator}\s*{0}' 2690 .format(self.RE_RTIMEHMS2, **self.locale.re_values)) 2691 2692 # "4:30-5pm " 2693 self.TIMERNG4 = (r'{0}\s*{rangeseparator}\s*{1}' 2694 .format(self.RE_RTIMEHMS, self.RE_RTIMEHMS2, 2695 **self.locale.re_values)) 2696 2697 self.re_option = re.IGNORECASE + re.VERBOSE 2698 self.cre_source = {'CRE_SPECIAL': self.RE_SPECIAL, 2699 'CRE_NUMBER': self.RE_NUMBER, 2700 'CRE_UNITS': self.RE_UNITS, 2701 'CRE_UNITS_ONLY': self.RE_UNITS_ONLY, 2702 'CRE_QUNITS': self.RE_QUNITS, 2703 'CRE_MODIFIER': self.RE_MODIFIER, 2704 'CRE_TIMEHMS': self.RE_TIMEHMS, 2705 'CRE_TIMEHMS2': self.RE_TIMEHMS2, 2706 'CRE_DATE': self.RE_DATE, 2707 'CRE_DATE2': self.RE_DATE2, 2708 'CRE_DATE3': self.RE_DATE3, 2709 'CRE_DATE4': self.RE_DATE4, 2710 'CRE_MONTH': self.RE_MONTH, 2711 'CRE_WEEKDAY': self.RE_WEEKDAY, 2712 'CRE_DAY': self.RE_DAY, 2713 'CRE_DAY2': self.RE_DAY2, 2714 'CRE_TIME': self.RE_TIME, 2715 'CRE_REMAINING': self.RE_REMAINING, 2716 'CRE_RTIMEHMS': self.RE_RTIMEHMS, 2717 'CRE_RTIMEHMS2': self.RE_RTIMEHMS2, 2718 'CRE_RDATE': self.RE_RDATE, 2719 'CRE_RDATE3': self.RE_RDATE3, 2720 'CRE_TIMERNG1': self.TIMERNG1, 2721 'CRE_TIMERNG2': self.TIMERNG2, 2722 'CRE_TIMERNG3': self.TIMERNG3, 2723 'CRE_TIMERNG4': self.TIMERNG4, 2724 'CRE_DATERNG1': self.DATERNG1, 2725 'CRE_DATERNG2': self.DATERNG2, 2726 'CRE_DATERNG3': self.DATERNG3, 2727 'CRE_NLP_PREFIX': self.RE_NLP_PREFIX} 2728 self.cre_keys = set(self.cre_source.keys()) 2729
2730 - def __getattr__(self, name):
2731 if name in self.cre_keys: 2732 value = re.compile(self.cre_source[name], self.re_option) 2733 setattr(self, name, value) 2734 return value 2735 elif name in self.locale.locale_keys: 2736 return getattr(self.locale, name) 2737 else: 2738 raise AttributeError(name)
2739
2740 - def daysInMonth(self, month, year):
2741 """ 2742 Take the given month (1-12) and a given year (4 digit) return 2743 the number of days in the month adjusting for leap year as needed 2744 """ 2745 result = None 2746 debug and log.debug('daysInMonth(%s, %s)', month, year) 2747 if month > 0 and month <= 12: 2748 result = self._DaysInMonthList[month - 1] 2749 2750 if month == 2: 2751 if year in self._leapYears: 2752 result += 1 2753 else: 2754 if calendar.isleap(year): 2755 self._leapYears.append(year) 2756 result += 1 2757 2758 return result
2759
2760 - def getSource(self, sourceKey, sourceTime=None):
2761 """ 2762 GetReturn a date/time tuple based on the giving source key 2763 and the corresponding key found in self.re_sources. 2764 2765 The current time is used as the default and any specified 2766 item found in self.re_sources is inserted into the value 2767 and the generated dictionary is returned. 2768 """ 2769 if sourceKey not in self.re_sources: 2770 return None 2771 2772 if sourceTime is None: 2773 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = time.localtime() 2774 else: 2775 (yr, mth, dy, hr, mn, sec, wd, yd, isdst) = sourceTime 2776 2777 defaults = {'yr': yr, 'mth': mth, 'dy': dy, 2778 'hr': hr, 'mn': mn, 'sec': sec} 2779 2780 source = self.re_sources[sourceKey] 2781 2782 values = {} 2783 2784 for key, default in defaults.items(): 2785 values[key] = source.get(key, default) 2786 2787 return (values['yr'], values['mth'], values['dy'], 2788 values['hr'], values['mn'], values['sec'], 2789 wd, yd, isdst)
2790