Wanted to make a post to see if anyone else
- had similar challenges
- would benefit from a Lazy Loading solution
- or has a reason why I should absolutely not be doing this
I had been having trouble getting my team to write object oriented code when working with Frappe. Because frappe loads the entire document for a given doctype, it is faster to just get a dictionary of fields based on the list of objects you require.
Because of this, most of our code dealt with dictionaries instead of objects and was coded to only work in very specific scenarios.
About a year ago I wrote a subclass of Document called LazyDocument, that overrides database methods in an attempt to only load fields as needed, and to not write back empty rows to the database.
Usage for any classes that want to extend this functionality
class LazyQuotation(Quotation, LazyDocument):
DOCTYPE: str = "Quotation"
def __init__(self, name: str):
super().__init__(self.DOCTYPE, name)
quotation = LazyQuotation("1234")
print(quotation.property) # property loaded here
We have been working with this for about a year and it has made it easier to write object oriented code, given that we can work with Objects instead of Dictionaries.
My question: Is this insane? Would this be useful to anyone? Am I trying to do something that already has an easy solution?
Should I try to make this into a PR, or do I scrap this entirely?
This is a snippet of what I’m trying to achieve
class LazyDocument(Document, EnforceOverrides):
"""
This object will load attributes into memory as they are accessed, instead of loading the entire document at once.
There are two overall sections to this file that are key to its function.
1. Catch every occurence of this object trying to fetch a property. If the property is not found, attempt to load from database.
2. During object creation, override core functionality to avoid loading the majority of properties.
The first step is handled via __getattribute__, get(), and our private lazy loading helpers
The second step is handled via overriding load_from_db and db_update from our parent classes
"""
# === Static ===
@classmethod
def documents_by_filter(cls, filters: {str: str}) -> [LazyDocument]:
""" Returns an array of LazyDocuments of the subclass referenced when calling this method"""
doc_name_dicts = frappe.get_list(cls.DOCTYPE, filters=filters, fields=['name'])
doc_names = [item['name'] for item in doc_name_dicts]
return [cls(name) for name in doc_names]
# === Operator Overload ===
def __getattribute__(self, fieldname: str):
"""
1. This is the first crucial part of this object. __getattribute__ is called during ANY access of this object;
getting, setting, calling a function via obj.func(), everything is routed through __getattribute__.
"""
try:
return super().__getattribute__(fieldname)
except AttributeError:
# If we have seen the field before, or this field is not found on our doctype, let the exception resolve
if (self._is_field_resolved(fieldname) or
not DocTypeMetaData.shared().is_field(self.doctype, fieldname)):
# If we do not have the field we need to re-raise the exception.
# Returning None for example would tell functions like hasattr() that this object DOES contain the field, it just happens to be empty.
_debug("Object '{}' does not have field {}".format(type(self), fieldname))
self._resolve_field(fieldname)
raise AttributeError(f"'{self.__class__.__name__}' object has no attribute '{fieldname}'")
self._resolve_field(fieldname)
return self._lazy_load_attr(fieldname)
# === Private ===
def _has_attr(self, fieldname: str) -> bool:
"""
Does a field exist.
Used in place of getattr to avoid infinite loops
"""
return fieldname in object.__getattribute__(self, '__dict__')
def _is_field_resolved(self, fieldname: str) -> bool:
""" Has this field previously been checked for this object """
if not self._has_attr('_fetched_attr_set'):
self._fetched_attr_set = set([])
return fieldname in self._fetched_attr_set
def _resolve_field(self, fieldname: str):
""" Mark this field as resolved """
if not self._has_attr('_fetched_attr_set'):
self._fetched_attr_set = set([])
self._fetched_attr_set.add(fieldname)
def _load_child_table(self, fieldname: str) -> Any:
"""
### Copied snippet from document.py 'load_from_db'. ###
using 'options' here in place of load_from_db's 'df' field.
"""
options = DocTypeMetaData.shared().field_options(self.doctype, fieldname)
# ------------------------------------------------------------------------------------ #
from frappe.model.utils import is_virtual_doctype
# Make sure not to query the DB for a child table, if it is a virtual one.
# During frappe is installed, the property "is_virtual" is not available in tabDocType, so
# we need to filter those cases for the access to frappe.db.get_value() as it would crash otherwise.
if hasattr(self, "doctype") and not hasattr(self, "module") and is_virtual_doctype(options):
self.set(fieldname, [])
return
children = (
frappe.db.get_values(
options,
{"parent": self.name, "parenttype": self.doctype, "parentfield": fieldname},
"*",
as_dict=True,
order_by="idx asc",
for_update=self.flags.for_update,
)
or []
)
self.set(fieldname, children)
def _lazy_load_attr(self, fieldname: str) -> Any:
""" Attempts to populate this field from the database """
if DocTypeMetaData.shared().is_field_child_table(self.doctype, fieldname):
_debug("Field '{}' is a child table. Fetching manually".format(fieldname))
self._load_child_table(fieldname)
field_value = getattr(self, fieldname, None)
else:
_debug("Field '{}' is not a child table. Fetching from db with frappe.get_value.".format(fieldname))
try:
field_value = frappe.get_value(self.doctype, self.name, fieldname)
except Exception as e:
_debug("frappe db error: {}".format(e))
field_value = None
setattr(self, fieldname, field_value)
_debug("Field '{}' resolved to {}".format(fieldname, field_value))
return field_value
# === Overrides ===
@override
def get(self, key, filters=None, limit=None, default=None):
"""
### Copied snippet from base_document.py 'get'. ###
When performing obj.get("taxes") for example, we need to be sure to use our lazy flow
### Replaced lines are highlighted with ###
"""
from frappe.model.base_document import _filter
if isinstance(key, dict):
return _filter(self.get_all_children(), key, limit=limit)
if filters:
if isinstance(filters, dict):
try:
value = self.__getattribute__(key) ### REPLACED self.__dict__ with self.__getattribute__ ###
except:
value = default ## Moved Default into the catch of __getattribute__
return _filter(value, filters, limit=limit)
# perhaps you wanted to set a default instead
default = filters
try:
value = self.__getattribute__(key) ### REPLACED self.__dict__ with self.__getattribute__ ###
except:
value = default ## Moved Default into the catch of __getattribute__
if limit and isinstance(value, (list, tuple)) and len(value) > limit:
value = value[:limit]
return value
# Doctypes cannot function without these attributes. Add more as needed.
REQUIRED_FIELDS = ["name", "modified"]
@override
def load_from_db(self):
"""
### Copied directly from base_document.py ###
Load document and children from database and create properties
from fields
### Replaced lines are highlighted with ###
"""
self.flags.ignore_children = True
if not getattr(self, "_metaclass", False) and self.meta.issingle:
single_doc = frappe.db.get_singles_dict(self.doctype, for_update=self.flags.for_update)
if not single_doc:
single_doc = frappe.new_doc(self.doctype, as_dict=True)
single_doc["name"] = self.doctype
del single_doc["__islocal"]
BaseDocument.__init__(self, single_doc) ### ALTERED LINE super().__init__(single_doc) ###
self.init_valid_columns()
self._fix_numeric_types()
else:
### INSERTED LOGIC ###
### Skiping frappe load. Only load mandatory_fields instead of * ###
mandatory_fields = self.REQUIRED_FIELDS[:]
for field in self.meta.get_set_only_once_fields():
mandatory_fields.append(field.fieldname)
d = frappe.db.get_value(
self.doctype, self.name, mandatory_fields, as_dict=1, for_update=self.flags.for_update
)
if not d:
frappe.throw(
_("{0} {1} not found").format(_(self.doctype), self.name), frappe.DoesNotExistError
)
BaseDocument.__init__(self, d) ### ALTERED LINE super().__init__(d) ###
self.flags.pop("ignore_children", None)
for df in self._get_table_fields():
### INSERTED LOGIC ###
### If hit is a known field type from metadata, skip it and let it be lazy loaded later ###
### _HOWEVER_ BKD-1214: Issues arise when lazy loading 'Table MultiSelect' type tables. Forget the performance savings, load these anyway.
if (df.fieldtype != "Table MultiSelect") and DocTypeMetaData.shared().is_field(self.doctype, df.fieldname):
_debug("Skipping initial load of field {}, of type {}".format(df.fieldname, df.options))
continue
# Make sure not to query the DB for a child table, if it is a virtual one.
# During frappe is installed, the property "is_virtual" is not available in tabDocType, so
# we need to filter those cases for the access to frappe.db.get_value() as it would crash otherwise.
from frappe.model.utils import is_virtual_doctype ### INSERTED LINE ###
if hasattr(self, "doctype") and not hasattr(self, "module") and is_virtual_doctype(df.options):
self.set(df.fieldname, [])
continue
children = (
frappe.db.get_values(
df.options,
{"parent": self.name, "parenttype": self.doctype, "parentfield": df.fieldname},
"*",
as_dict=True,
order_by="idx asc",
for_update=self.flags.for_update,
)
or []
)
self.set(df.fieldname, children)
# sometimes __setup__ can depend on child values, hence calling again at the end
if hasattr(self, "__setup__"):
self.__setup__()
@override
def db_update(self, ignore_if_duplicate=False):
"""
### Copied directly from base_document.py ###
### Replaced lines are highlighted with ###
"""
if self.get("__islocal") or not self.name:
self.db_insert()
return
d = self.get_valid_dict(
convert_dates_to_str=True,
### This is the only altered line. Ignore all null values when creating dict, instead of inserting null into database ###
ignore_nulls=True,
ignore_virtual=True,
)
# don't update name, as case might've been changed
name = cstr(d["name"])
del d["name"]
columns = list(d)
try:
frappe.db.sql(
"""UPDATE `tab{doctype}`
SET {values} WHERE `name`=%s""".format(
doctype=self.doctype, values=", ".join("`" + c + "`=%s" for c in columns)
),
list(d.values()) + [name],
)
except Exception as e:
if frappe.db.is_unique_key_violation(e):
self.show_unique_validation_message(e)
else:
raise
@override
def update_child_table(self, fieldname, df=None):
"""
Do not update the child tables for a doctype if they were never even loaded into memory
"""
if not self.get(df.fieldname):
return
return super().update_child_table(fieldname, df)
_is_debug = False
# I forsee needing to debug this object. Please leave _debug messages in.
def _debug(message: str):
if _is_debug:
print(message)```