manusimidt · rAndrewNichol · May 29, 2022 · May 29, 2022
diff --git a/xbrl/helper/xml_parser.py b/xbrl/helper/xml_parser.py
@@ -4,10 +4,10 @@
 It is used by the different parsing modules.
 """
 import xml.etree.ElementTree as ET
-from io import StringIO
+from io import StringIO, IOBase
 
 
-def parse_file(file: str or StringIO) -> ET.ElementTree:
+def parse_file(file: str or IOBase or StringIO) -> ET.ElementTree:
     """
     Parses a file, returns the Root element with an attribute 'ns_map' containing the prefix - namespaces map
     :param file: either the file path (str) or a file-like object
@@ -18,6 +18,10 @@ def parse_file(file: str or StringIO) -> ET.ElementTree:
     root = None
     ns_map = []
 
+    # sets the file pointer back to the beginning in case it was read from multiple times
+    if isinstance(file, IOBase):
+        file.seek(0, 0)
+
     for event, elem in ET.iterparse(file, events):
         if event == "start-ns":
             ns_map.append(elem)

diff --git a/xbrl/instance.py b/xbrl/instance.py
@@ -7,7 +7,7 @@
 import re
 import abc
 import logging
-from io import StringIO, BytesIO
+from io import StringIO, BytesIO, IOBase
 from typing import List
 import xml.etree.ElementTree as ET
 from datetime import date, datetime
@@ -246,21 +246,23 @@ class XbrlInstance(abc.ABC):
     """
     Class representing a xbrl instance file
     """
-
-    def __init__(self, url: str, taxonomy: TaxonomySchema, facts: List[AbstractFact], context_map: dict,
+    def __init__(self, url: str or IOBase or StringIO, taxonomy: TaxonomySchema, facts: List[AbstractFact], context_map: dict,
                  unit_map: dict) -> None:
         """
         :param taxonomy: taxonomy file that the instance file references (via link:schemaRef)
         :param facts: array of all facts that the instance contains
         """
         self.taxonomy: TaxonomySchema = taxonomy
         self.facts: List[AbstractFact] = facts
-        self.instance_url: str = url
+        self.instance_url: str or IOBase or StringIO = url
         self.context_map: dict = context_map
         self.unit_map: dict = unit_map
 
     def __str__(self) -> str:
-        file_name: str = self.instance_url.split('/')[-1]
+        if isinstance(self.instance_url, str):
+            file_name: str = self.instance_url.split('/')[-1]
+        elif isinstance(self.instance_url, IOBase):
+            file_name: str = self.instance_url.__str__()
         return "{} with {} facts".format(file_name, len(self.facts))
 
 
@@ -279,7 +281,7 @@ def parse_xbrl_url(instance_url: str, cache: HttpCache) -> XbrlInstance:
     return parse_xbrl(instance_path, cache, instance_url)
 
 
-def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None) -> XbrlInstance:
+def parse_xbrl(instance_path: str or IOBase or StringIO, cache: HttpCache, instance_url: str or None = None) -> XbrlInstance:
     """
     Parses a instance file with it's taxonomy
     :param instance_path: url to the instance file (on the internet)
@@ -293,6 +295,7 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
     # get the link to the taxonomy schema and parse it
     schema_ref: ET.Element = root.find(LINK_NS + 'schemaRef')
     schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
+
     # check if the schema uri is relative or absolute
     # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas
     if schema_uri.startswith('http'):
@@ -302,10 +305,12 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
         # fetch the taxonomy extension schema from remote by reconstructing the url
         schema_url = resolve_uri(instance_url, schema_uri)
         taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache)
-    else:
+    elif isinstance(instance_path, str):
         # try to find the taxonomy extension schema file locally because no full url can be constructed
         schema_path = resolve_uri(instance_path, schema_uri)
         taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
+    elif isinstance(instance_path, IOBase):
+        taxonomy: TaxonomySchema = parse_taxonomy(instance_path, cache)
 
     # parse contexts and units
     context_dir = _parse_context_elements(root.findall('xbrli:context', NAME_SPACES), root.attrib['ns_map'], taxonomy,
@@ -331,22 +336,24 @@ def parse_xbrl(instance_path: str, cache: HttpCache, instance_url: str or None =
         taxonomy_ns = taxonomy_ns.replace('{', '')
         # get the concept object from the taxonomy
         tax = taxonomy.get_taxonomy(taxonomy_ns)
-        if tax is None: tax = _load_common_taxonomy(cache, taxonomy_ns, taxonomy)
-
-        concept: Concept = tax.concepts[tax.name_id_map[concept_name]]
-        context: AbstractContext = context_dir[fact_elem.attrib['contextRef'].strip()]
-
-        if 'unitRef' in fact_elem.attrib:
-            # the fact is a numerical fact
-            # get the unit
-            unit: AbstractUnit = unit_dir[fact_elem.attrib['unitRef'].strip()]
-            decimals_text: str = str(fact_elem.attrib['decimals']).strip()
-            decimals: int = None if decimals_text.lower() == 'inf' else int(decimals_text)
-            fact = NumericFact(concept, context, float(fact_elem.text), unit, decimals)
-        else:
-            # the fact is probably a text fact
-            fact = TextFact(concept, context, fact_elem.text.strip())
-        facts.append(fact)
+        if tax is None:
+            tax = _load_common_taxonomy(cache, taxonomy_ns, taxonomy)
+
+        if concept_name in tax.name_id_map:
+            concept: Concept = tax.concepts[tax.name_id_map[concept_name]]
+            context: AbstractContext = context_dir[fact_elem.attrib['contextRef'].strip()]
+
+            if 'unitRef' in fact_elem.attrib:
+                # the fact is a numerical fact
+                # get the unit
+                unit: AbstractUnit = unit_dir[fact_elem.attrib['unitRef'].strip()]
+                decimals_text: str = str(fact_elem.attrib['decimals']).strip()
+                decimals: int = None if decimals_text.lower() == 'inf' else int(decimals_text)
+                fact = NumericFact(concept, context, float(fact_elem.text), unit, decimals)
+            else:
+                # the fact is probably a text fact
+                fact = TextFact(concept, context, fact_elem.text.strip())
+            facts.append(fact)
 
     return XbrlInstance(instance_url if instance_url else instance_path, taxonomy, facts, context_dir, unit_dir)
 
@@ -366,7 +373,7 @@ def parse_ixbrl_url(instance_url: str, cache: HttpCache) -> XbrlInstance:
     return parse_ixbrl(instance_path, cache, instance_url)
 
 
-def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None = None, encoding=None) -> XbrlInstance:
+def parse_ixbrl(instance_path: str or IOBase or StringIO, cache: HttpCache, instance_url: str or None = None, encoding=None) -> XbrlInstance:
     """
     Parses a inline XBRL (iXBRL) instance file.
     :param instance_path: path to the submission you want to parse
@@ -385,7 +392,10 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
     => in the XBRL-parse function root is ET.Element, here just an instance of ElementTree class!
     """
 
-    instance_file = open(instance_path, "r", encoding=encoding)
+    if isinstance(instance_path, str):
+        instance_file = open(instance_path, "r", encoding=encoding)
+    elif isinstance(instance_path, IOBase):
+        instance_file = instance_path
     contents = instance_file.read()
     pattern = r'<[ ]*script.*?\/[ ]*script[ ]*>'
     contents = re.sub(pattern, '', contents, flags=(re.IGNORECASE | re.MULTILINE | re.DOTALL))
@@ -395,6 +405,7 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
     # get the link to the taxonomy schema and parse it
     schema_ref: ET.Element = root.find('.//{}schemaRef'.format(LINK_NS))
     schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
+
     # check if the schema uri is relative or absolute
     # submissions from SEC normally have their own schema files, whereas submissions from the uk have absolute schemas
     if schema_uri.startswith('http'):
@@ -404,10 +415,12 @@ def parse_ixbrl(instance_path: str, cache: HttpCache, instance_url: str or None
         # fetch the taxonomy extension schema from remote by reconstructing the url
         schema_url = resolve_uri(instance_url, schema_uri)
         taxonomy: TaxonomySchema = parse_taxonomy_url(schema_url, cache)
-    else:
+    elif isinstance(instance_path, str):
         # try to find the taxonomy extension schema file locally because no full url can be constructed
         schema_path = resolve_uri(instance_path, schema_uri)
         taxonomy: TaxonomySchema = parse_taxonomy(schema_path, cache)
+    elif isinstance(instance_path, IOBase):
+        taxonomy: TaxonomySchema = parse_taxonomy(instance_path, cache)
 
     # get all contexts and units
     xbrl_resources: ET.Element = root.find('.//ix:resources', ns_map)
@@ -581,11 +594,12 @@ def _parse_context_elements(context_elements: List[ET.Element], ns_map: dict, ta
                 if member_tax is None:
                     # try to subsequently load the taxonomy
                     member_tax = _load_common_taxonomy(cache, ns_map[member_prefix], taxonomy)
-                dimension_concept: Concept = dimension_tax.concepts[dimension_tax.name_id_map[dimension_concept_name]]
-                member_concept: Concept = member_tax.concepts[member_tax.name_id_map[member_concept_name]]
+                if dimension_concept_name in dimension_tax.name_id_map and member_concept_name in member_tax.name_id_map:
+                    dimension_concept: Concept = dimension_tax.concepts[dimension_tax.name_id_map[dimension_concept_name]]
+                    member_concept: Concept = member_tax.concepts[member_tax.name_id_map[member_concept_name]]
 
-                # add the explicit member to the context
-                context.segments.append(ExplicitMember(dimension_concept, member_concept))
+                    # add the explicit member to the context
+                    context.segments.append(ExplicitMember(dimension_concept, member_concept))
 
         context_dict[context_id] = context
     return context_dict
@@ -676,9 +690,15 @@ def parse_instance_locally(self, path: str, instance_url: str or None = None) ->
         instance document was downloaded, the parser can fetch relative imports using this base url
         :return:
         """
+
         if path.split('.')[-1] == 'xml' or path.split('.')[-1] == 'xbrl':
             return parse_xbrl(path, self.cache, instance_url)
         return parse_ixbrl(path, self.cache, instance_url)
 
+    def parse_file_obj(self, file_obj, instance_url: str or None = None, is_xbrl: bool = True):
+        if is_xbrl is True:
+            return parse_xbrl(file_obj, self.cache, instance_url)
+        return parse_ixbrl(file_obj, self.cache, instance_url)
+
     def __str__(self) -> str:
         return 'XbrlParser with cache dir at {}'.format(self.cache.cache_dir)
diff --git a/xbrl/linkbase.py b/xbrl/linkbase.py
@@ -8,6 +8,7 @@
 """
 import abc
 import os
+from io import StringIO, IOBase
 from typing import List
 import xml.etree.ElementTree as ET
 from abc import ABC
@@ -428,7 +429,7 @@ def parse_linkbase_url(linkbase_url: str, linkbase_type: LinkbaseType, cache: Ht
     return parse_linkbase(linkbase_path, linkbase_type, linkbase_url)
 
 
-def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url: str or None = None) -> Linkbase:
+def parse_linkbase(linkbase_path: str or IOBase or StringIO, linkbase_type: LinkbaseType, linkbase_url: str or None = None) -> Linkbase:
     """
     Parses a linkbase and returns a Linkbase object containing all
     locators, arcs and links of the linkbase in a hierarchical order (a Tree)
@@ -440,10 +441,11 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url
     the url has to be set so that the parser can connect the locator with concept from the taxonomy
     :return:
     """
-    if linkbase_path.startswith('http'): raise XbrlParseException(
-        'This function only parses locally saved linkbases. Please use parse_linkbase_url to parse remote linkbases')
-    if not os.path.exists(linkbase_path):
-        raise LinkbaseNotFoundException(f"Could not find linkbase at {linkbase_path}")
+    if isinstance(linkbase_path, str):
+        if linkbase_path.startswith('http'): raise XbrlParseException(
+            'This function only parses locally saved linkbases. Please use parse_linkbase_url to parse remote linkbases')
+        if not os.path.exists(linkbase_path):
+            raise LinkbaseNotFoundException(f"Could not find linkbase at {linkbase_path}")
 
     root: ET.Element = ET.parse(linkbase_path).getroot()
     # store the role refs in a dictionary, with the role uri as key.
@@ -490,7 +492,8 @@ def parse_linkbase(linkbase_path: str, linkbase_type: LinkbaseType, linkbase_url
             if not locator_href.startswith('http'):
                 # resolve the path
                 # todo, try to get the URL here, instead of the path!!!
-                locator_href = resolve_uri(linkbase_url if linkbase_url else linkbase_path, locator_href)
+                if linkbase_url or isinstance(linkbase_path, str):
+                    locator_href = resolve_uri(linkbase_url if linkbase_url else linkbase_path, locator_href)
             locator_map[loc_label] = Locator(locator_href, loc_label)
 
         # Performance: extract the labels in advance. The label name (xlink:label) is the key and the value is

diff --git a/xbrl/taxonomy.py b/xbrl/taxonomy.py
@@ -8,14 +8,17 @@
 """
 import logging
 import os
+from io import StringIO, IOBase
 from typing import List
 import xml.etree.ElementTree as ET
+
 from functools import lru_cache
 from urllib.parse import unquote
 
 from xbrl import XbrlParseException, TaxonomyNotFound
 from xbrl.cache import HttpCache
 from xbrl.helper.uri_helper import resolve_uri, compare_uri
+from xbrl.helper.xml_parser import parse_file
 from xbrl.linkbase import Linkbase, ExtendedLink, LinkbaseType, parse_linkbase, parse_linkbase_url, Label
 
 logger = logging.getLogger(__name__)
@@ -432,7 +435,7 @@ class TaxonomySchema:
     This parser will not parse all Schemas and imports, only what is necessary.
     """
 
-    def __init__(self, schema_url: str, namespace: str):
+    def __init__(self, schema_url: str or IOBase or StringIO, namespace: str):
         """
         The imports array stores an array of all Schemas that are imported.
         The current Taxonomy Schema can override the extended schemas in the following way:
@@ -472,7 +475,7 @@ def get_taxonomy(self, url: str):
         :return either a TaxonomySchema obj or None
         :return:
         """
-        if compare_uri(self.namespace, url) or compare_uri(self.schema_url, url):
+        if (self.namespace is not None and compare_uri(self.namespace, url)) or (isinstance(self.schema_url, str) and compare_uri(self.schema_url, url)):
             return self
 
         for imported_tax in self.imports:
@@ -513,7 +516,7 @@ def parse_taxonomy_url(schema_url: str, cache: HttpCache) -> TaxonomySchema:
     return parse_taxonomy(schema_path, cache, schema_url)
 
 
-def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None = None) -> TaxonomySchema:
+def parse_taxonomy(schema_path: str or IOBase or StringIO, cache: HttpCache, schema_url: str or None = None) -> TaxonomySchema:
     """
     Parses a taxonomy schema file.
     :param schema_path: url to the schema (on the internet)
@@ -522,15 +525,28 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
     imported schemas from the remote location. If this url is None, the script will try to find those resources locally.
     :return:
     """
-    if schema_path.startswith('http'): raise XbrlParseException(
-        'This function only parses locally saved taxonomies. Please use parse_taxonomy_url to parse remote taxonomy schemas')
-    if not os.path.exists(schema_path):
-        raise TaxonomyNotFound(f"Could not find taxonomy schema at {schema_path}")
+
+    if isinstance(schema_path, str):
+        if schema_path.startswith('http'): raise XbrlParseException(
+            'This function only parses locally saved taxonomies. Please use parse_taxonomy_url to parse remote taxonomy schemas')
+        if not os.path.exists(schema_path):
+            raise TaxonomyNotFound(f"Could not find taxonomy schema at {schema_path}")
 
     # Get the local absolute path to the schema file (and download it if it is not yet cached)
-    root: ET.Element = ET.parse(schema_path).getroot()
+    if isinstance(schema_path, str):
+        root: ET.Element = ET.parse(schema_path).getroot()
+    elif isinstance(schema_path, IOBase):
+        root: ET.Element = parse_file(schema_path).getroot()
     # get the target namespace of the taxonomy
-    target_ns = root.attrib['targetNamespace']
+
+    if 'targetNamespace' in root.attrib:
+        target_ns = root.attrib['targetNamespace']
+    else:
+        schema_ref: ET.Element = root.find(LINK_NS + 'schemaRef')
+        schema_uri: str = schema_ref.attrib[XLINK_NS + 'href']
+        ticker = schema_uri.split("-")[0]
+        target_ns = root.attrib['ns_map'][ticker]
+
     taxonomy: TaxonomySchema = TaxonomySchema(schema_url if schema_url else schema_path, target_ns)
 
     import_elements: List[ET.Element] = root.findall('xsd:import', NAME_SPACES)
@@ -546,10 +562,12 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
             # fetch the schema file from remote by reconstructing the full url
             import_url = resolve_uri(schema_url, import_uri)
             taxonomy.imports.append(parse_taxonomy_url(import_url, cache))
-        else:
+        elif isinstance(schema_path, str):
             # We have to try to fetch the linkbase locally because no full url can be constructed
             import_path = resolve_uri(schema_path, import_uri)
             taxonomy.imports.append(parse_taxonomy(import_path, cache))
+        elif isinstance(schema_path, IOBase):
+            taxonomy.imports.append(parse_taxonomy(schema_path, cache))
 
     role_type_elements: List[ET.Element] = root.findall('xsd:annotation/xsd:appinfo/link:roleType', NAME_SPACES)
     # parse ELR's
@@ -597,10 +615,12 @@ def parse_taxonomy(schema_path: str, cache: HttpCache, schema_url: str or None =
             # fetch the linkbase from remote by reconstructing the full URL
             linkbase_url = resolve_uri(schema_url, linkbase_uri)
             linkbase: Linkbase = parse_linkbase_url(linkbase_url, linkbase_type, cache)
-        else:
+        elif isinstance(schema_path, str):
             # We have to try to fetch the linkbase locally because no full url can be constructed
             linkbase_path = resolve_uri(schema_path, linkbase_uri)
             linkbase: Linkbase = parse_linkbase(linkbase_path, linkbase_type)
+        elif isinstance(schema_path, IOBase):
+            linkbase: Linkbase = parse_linkbase(schema_path, linkbase_type)
 
         # add the linkbase to the taxonomy
         if linkbase_type == LinkbaseType.DEFINITION: