I was working to get redisearch indexing of my blog posts and came up with this script that works with the default Debian 12 redisearch version of 1.2. It will not work with rediseach version 2 or up as the indexing is not hash based in version 2.
-
You need a working version of redisearch v 1.2 loaded into your redis cache. See the following link to enable redisearch in frappe. https://docs.frappe.io/erpnext/user/manual/en/installing_redisearch_to_enable_super_fast_e_commerce_search
NB: if you are using Debian 12 justsudo apt install redisearch
and use the redisearch.so file installed as the shared object file to point to in your frappe config/redis_cache.conf file. -
Create a file in your app and paste the code below. I placed it in a folder called api and named the file blog_post_search.py
eg:
/yourappname/yourappname/api/blog_post_search.py
import frappe
import datetime
from redis import Redis
from redis.commands.search.field import TextField, NumericField
from redis.commands.search.indexDefinition import IndexDefinition
from redis.commands.search.query import Query
from frappe.utils import cstr, strip_html_tags
import logging
# Enable detailed logging
logging.basicConfig(level=logging.DEBUG)
redis_host = frappe.conf.redis_cache.split("//")[1].split(":")[0]
redis_port = frappe.conf.redis_cache.split(":")[-1]
client = Redis(host=redis_host, port=redis_port, decode_responses=True)
def build_blog_posts_index():
# Define RediSearch index schema (hash-based for RediSearch 1.2)
schema = (
TextField("title", sortable=True),
TextField("content", weight=1.0), # Increase weight to match title
TextField("blogger", sortable=True),
TextField("published_on", sortable=True),
NumericField("read_time", sortable=True),
TextField("meta_image", no_index=True, sortable=True),
)
# Function to preprocess documents (convert datetime to string)
def preprocess_document(doc):
if isinstance(doc.get("published_on"), datetime.date):
doc["published_on"] = doc["published_on"].isoformat()
# Ensure content is a string and not empty
doc["content"] = strip_html_tags(cstr(doc.get("content", "")))
if not doc["content"]:
logging.warning(
f"Empty content after preprocessing for doc: {doc.get('title')}"
)
return doc
# Fetch blog posts from frappe backend
try:
response = frappe.get_all(
"Blog Post",
fields=[
"title",
"blog_intro",
"content",
"blogger",
"published_on",
"read_time",
"meta_image",
"route",
],
)
blog_posts = response
except Exception as e:
logging.error(f"Failed to fetch blog posts: {e}")
return
# Create index and clear existing keys
try:
indexes = client.execute_command("FT._LIST")
if "idx:blog-posts" in indexes:
logging.info("Deleting Index")
client.execute_command("FT.DROP", "idx:blog-posts")
for key in client.keys("blog:*"):
client.delete(key)
else:
logging.info("Index 'idx:blog-posts' does not exist.")
# Create hash-based index
client.execute_command(
"FT.CREATE",
"idx:blog-posts",
"ON",
"HASH",
"PREFIX",
1,
"blog:",
"SCHEMA",
"title",
"TEXT",
"SORTABLE",
"blog_intro",
"TEXT",
"SORTABLE",
"content",
"TEXT",
"WEIGHT",
1.0, # Match title weight
"blogger",
"TEXT",
"SORTABLE",
"published_on",
"TEXT",
"SORTABLE",
"read_time",
"NUMERIC",
"SORTABLE",
"meta_image",
"TEXT",
"NOINDEX",
"SORTABLE",
"route",
"TEXT",
)
logging.info("Blog Posts Index created successfully")
except Exception as e:
logging.error(f"Blog Posts Index creation failed: {e}")
return
# Index documents as hashes
for i, doc in enumerate(blog_posts):
try:
doc = preprocess_document(doc)
key = f"blog:{i+1}"
client.hset(key, mapping=doc)
# logging.debug(f"Indexed document {key}: {doc}")
except Exception as e:
logging.error(f"Error indexing document {key}: {e}")
client.close()
@frappe.whitelist(allow_guest=True)
def search_posts(
query: str = "", sort_by: str = "", sort_order: str = "False", field: str = ""
):
logging.info(
f"Search query: {query}, field: {field}, sort_by: {sort_by}, sort_order: {sort_order}"
)
try:
# Validate field
valid_fields = [
"title",
"blog_intro",
"content",
"blogger",
"published_on",
"route",
]
if field and field not in valid_fields:
logging.error(f"Invalid field: {field}. Must be one of {valid_fields}")
return {"success": False, "error": f"Invalid field: {field}"}
# Convert sort_order string to boolean
sort_order_bool = sort_order.lower() == "true" if sort_by else False
# Build query: field-specific or global
search_query_str = (
f"@{field}:{query}" if field and query else f"{query}*" if query else "*"
)
search_query = Query(search_query_str).paging(0, 10)
if sort_by:
search_query = search_query.sort_by(sort_by, asc=sort_order_bool)
logging.debug(f"RediSearch query: {search_query.query_string()}")
# Execute search
results = client.ft("idx:blog-posts").search(search_query)
hits = [doc.__dict__ for doc in results.docs]
logging.info(f"Search returned {len(hits)} hits")
return {"success": True, "hits": hits}
except Exception as e:
logging.error(f"Search failed: {str(e)}")
return {"success": False, "error": str(e)}
finally:
client.close()
- Add a cron task to your hooks.py as below. This runs the indexing function every 15 minutes.
# Scheduled Tasks
# ---------------
scheduler_events = {
"cron": {
"*/15 * * * *": ["yourappname.api.blog_post_search.build_blog_posts_index"],
},
}
-
Run the command
bench execute yourappname.api.build_blog_posts_index
from your bench folder to build an index. Watch the terminal output to see any errors. -
Run the command
bench execute --kwargs "{'query': 'enter_your_text_to_search_for'}" yourappname.api.blog_post_search.search_posts
It should return something like this:
INFO:root:Search query: mega, field: , sort_by: , sort_order: False
DEBUG:root:RediSearch query: mega*
INFO:root:Search returned 2 hits
{"success": true, "hits": [{"id": "blog:2", "payload": null, "meta_image": "/files/bread_board5fbf34.jpg", "content": "Lorem ipsum odor amet, consectetuer adipiscing elit.Felis integer euismod at ac; sollicitudin quisque. Libero torquent ipsum fames maximus mi. Proin sodales dapibus faucibus efficitur litora. Proin ex consequat non nostra malesuada mollis dis neque lectus. Porta mus venenatis amet platea penatibus sed nascetur. Elementum primis adipiscing magna ante nam.Vehicula placerat tempus semper aptent nullam aptent velit quis purus.Tincidunt vivamus egestas eros a ultricies. Ex nam suspendisse mus dolor in est natoque pretium adipiscing. At blandit eros, duis mattis et feugiat congue ipsum. Eu proin dolor viverra arcu sapien. Cursus magnis montes rutrum nullam neque turpis eget nulla. Augue ultricies etiam cubilia vulputate aliquam mi. Inceptos parturient habitant varius amet in ex eu.Nascetur maecenas turpis, netus fermentum est euismod urna porta? Diam netus tempor tortor imperdiet sociosqu. Hendrerit dignissim dis sollicitudin primis adipiscing lectus. Blandit nunc viverra felis penatibus scelerisque magnis habitant rhoncus facilisi. Sociosqu at velit pretium nullam consequat etiam a. Ut pharetra aliquam venenatis libero ultrices leo eleifend donec mollis. Nulla nisl imperdiet dictum eget consequat nibh morbi gravida.Semper vehicula penatibus sollicitudin, et magnis nullam. Etiam sapien proin massa imperdiet mi cubilia ante. Ante finibus duis, at ultricies euismod auctor metus. Luctus odio efficitur duis consequat, egestas fames maximus.Aliquet volutpat sit volutpat nisi montes, fringilla vivamus. Iaculis inceptos tempus suscipit purus potenti non aliquam.Aliquam arcu dignissim fermentum inceptos, ac aptent consectetur. Sit congue finibus morbi curabitur ullamcorper. Sem himenaeos proin malesuada vehicula adipiscing tempor eleifend. Justo risus blandit tempor dis conubia sociosqu nisi scelerisque. Luctus pretium per, dapibus metus magna senectus ante. Nascetur varius ridiculus ullamcorper eros adipiscing. Ridiculus rhoncus enim aenean non lobortis suscipit id aenean metus.", "title": "A shop featured blog post", "published_on": "2025-01-04", "route": "blog/testing/a-shop-featured-blog-post", "blog_intro": "This shop featured blog post will be displayed in the shop mega menu.", "read_time": "2", "blogger": "Christopher Robert Nuss"}, {"id": "blog:4", "payload": null, "meta_image": "/files/jSlWVMN.jpg", "content": "FishLorem ipsum odor amet, consectetuer adipiscing elit. Felis integer euismod at ac; sollicitudin quisque. Libero torquent ipsum fames maximus mi. Proin sodales dapibus faucibus efficitur litora. Proin ex consequat non nostra malesuada mollis dis neque lectus. Porta mus venenatis amet platea penatibus sed nascetur. Elementum primis adipiscing magna ante nam.Vehicula placerat tempus semper aptent nullam aptent velit quis purus.Tincidunt vivamus egestas eros a ultricies. Ex nam suspendisse mus dolor in est natoque pretium adipiscing. At blandit eros, duis mattis et feugiat congue ipsum. Eu proin dolor viverra arcu sapien. Cursus magnis montes rutrum nullam neque turpis eget nulla. Augue ultricies etiam cubilia vulputate aliquam mi. Inceptos parturient habitant varius amet in ex eu.Nascetur maecenas turpis, netus fermentum est euismod urna porta? Diam netus tempor tortor imperdiet sociosqu. Hendrerit dignissim dis sollicitudin primis adipiscing lectus. Blandit nunc viverra felis penatibus scelerisque magnis habitant rhoncus facilisi. Sociosqu at velit pretium nullam consequat etiam a. Ut pharetra aliquam venenatis libero ultrices leo eleifend donec mollis. Nulla nisl imperdiet dictum eget consequat nibh morbi gravida.Semper vehicula penatibus sollicitudin, et magnis nullam. Etiam sapien proin massa imperdiet mi cubilia ante. Ante finibus duis, at ultricies euismod auctor metus. Luctus odio efficitur duis consequat, egestas fames maximus.Aliquet volutpat sit volutpat nisi montes, fringilla vivamus. Iaculis inceptos tempus suscipit purus potenti non aliquam.Aliquam arcu dignissim fermentum inceptos, ac aptent consectetur. Sit congue finibus morbi curabitur ullamcorper. Sem himenaeos proin malesuada vehicula adipiscing tempor eleifend. Justo risus blandit tempor dis conubia sociosqu nisi scelerisque. Luctus pretium per, dapibus metus magna senectus ante. Nascetur varius ridiculus ullamcorper eros adipiscing. Ridiculus rhoncus enim aenean non lobortis suscipit id aenean metus.", "title": "A Featured Wattle Seed Blog Post", "published_on": "2025-01-04", "route": "blog/testing/a-featured-wattle-seed-blog-post", "blog_intro": "This blog post will be featured in the wattle seed mega menu.", "read_time": "2", "blogger": "Christopher Robert Nuss"}]}
- if your running a production server or you have enabled supervisor in a dev server that you are testing this on, then you have to run
sudo supervisorctl reread all
and thensudo supervisorctl restart all
to restart the redis server. - run
bench restart
on a dev server orbench restart --supervisor
on a production server; - run
bench --site yoursitename migrate
to pick up the change in your hooks.py
Check the Scheduled Job Log for errors! If it works you can comment out the logging calls.
Ohh, you’ll have to create a web page or some such a page that passes a query to the api and deal with the returned data. Something like this bit of Nuxt script setup code that I use:
// API base URL from environment variable
const { apiBase } = useRuntimeConfig().public;
const apiEndpoint = `${apiBase}/api/v2/method/littlebunyip.api.blog_post_search.search_posts`
// Search function
const search = async () => {
loading.value = true
error.value = null
posts.value = []
showNoSearchResults.value = false;
try {
const { data: response } = await $fetch(apiEndpoint, {
method: 'POST',
body: {
query: searchQuery.value,
sort_by: sortBy.value,
sort_order: 'True', // Default to ascending, adjust as needed
},
})
if (response.success && response.hits.length) {
showSearchResults.value = true;
posts.value = response.hits
} else {
showNoSearchResults.value = true;
}
} catch (err) {
error.value = `Error: ${err.message}`
} finally {
loading.value = false
}
}
Good luck and I hope someone finds it useful.