In [10]:
import xmlschema
xmlschema.validate('mydoc.xml','myxmls.xsd')
---------------------------------------------------------------------------
XMLSchemaDecodeError                      Traceback (most recent call last)
<ipython-input-10-2a1576f372d8> in <module>
      1 import xmlschema
----> 2 xmlschema.validate('mydoc.xml','myxmls.xsd')

/usr/local/lib/python3.6/dist-packages/xmlschema/documents.py in validate(xml_document, schema, cls, path, schema_path, use_defaults, namespaces, locations, base_url, defuse, timeout, lazy)
     69     """
     70     source, schema = get_context(xml_document, schema, cls, locations, base_url, defuse, timeout, lazy)
---> 71     schema.validate(source, path, schema_path, use_defaults, namespaces)
     72 
     73 

/usr/local/lib/python3.6/dist-packages/xmlschema/validators/schema.py in validate(self, source, path, schema_path, use_defaults, namespaces)
   1087         """
   1088         for error in self.iter_errors(source, path, schema_path, use_defaults, namespaces):
-> 1089             raise error
   1090 
   1091     def is_valid(self, source, path=None, schema_path=None, use_defaults=True, namespaces=None):

XMLSchemaDecodeError: failed validating '201809zzz' with XsdAtomicBuiltin(name='xs:integer'):

Reason: invalid literal for int() with base 10: '201809zzz'

Schema:

  <xs:simpleType xmlns:xs="http://www.w3.org/2001/XMLSchema" id="integer" name="integer">
    <xs:annotation>
      <xs:documentation source="http://www.w3.org/TR/xmlschema-2/#integer" />
    </xs:annotation>
    <xs:restriction base="xs:decimal">
      <xs:fractionDigits fixed="true" id="integer.fractionDigits" value="0" />
      <xs:pattern value="[\-+]?[0-9]+" />
    </xs:restriction>
  </xs:simpleType>

Instance:

  <semester xmlns="http://blue.math.buffalo.edu/463/mycourses">201809zzz</semester>

Path: /mycourses/course[2]/semester
In [3]:
pwd
Out[3]:
'/home/ringland'
In [4]:
cd public_html/448
/home/ringland/public_html/448
In [13]:
import xmlschema
try:
    xmlschema.validate('mydoc.xml','myxmls.xsd')
    print('Looks great!')
except xmlschema.XMLSchemaDecodeError as e:
    #print(dir(e))
    print( e.obj )
    print( e.reason )
201809zzz
invalid literal for int() with base 10: '201809zzz'

Regular expressions

In [14]:
import re
In [16]:
re.findall( '.at','The cat sat on the mat.')
Out[16]:
['cat', 'sat', 'mat']
In [17]:
re.findall( '.*at','The cat sat on the mat.')
Out[17]:
['The cat sat on the mat']
In [18]:
re.findall( '.*?at','The cat sat on the mat.')
Out[18]:
['The cat', ' sat', ' on the mat']
In [21]:
re.findall( '[A-Z,a-z]*?at','The cat spat on the mat.')
Out[21]:
['cat', 'spat', 'mat']
In [26]:
re.findall( '[A-Z,a-z]{2,10}at','The cat spat on the mat.')
Out[26]:
['spat']
In [30]:
re.findall( '([A-Z,a-z]*?)at','The cat spat on the mat.')
Out[30]:
['c', 'sp', 'm']
In [32]:
re.findall( '[A-Z,a-z]*?at|th.','The cat spat on the mat.')
Out[32]:
['cat', 'spat', 'the', 'mat']
In [33]:
re.findall('\d*\.\d*','3.14159 > 2.71')
Out[33]:
['3.14159', '2.71']
In [ ]: