BOT_NAME = 'mirror' SPIDER_MODULES = ['mirror.spiders'] NEWSPIDER_MODULE = 'mirror.spiders' ITEM_PIPELINES = { 'mirror.pipelines.normalize.Words': 300, 'mirror.pipelines.filter.Injections': 400, 'mirror.pipelines.store.Attributes': 500 } RESULTS = 'mirror/results/' MEDIA_TAGS = ['video', 'audio', 'img', 'canvas'] INJECT_TAGS = ['script/@src', 'img/@src', 'video/@src', 'audio/@src', 'iframe/@src', 'embed/@src', 'link/@href'] SEMANTIC_TAGS = ['html', 'head', 'title', 'meta', 'link', 'body', 'header', 'footer', 'nav', 'article', 'aside', 'section', 'h1', 'h2', 'h3', 'h4', 'p', 'a', 'ul', 'ol', 'li', 'dl', 'dt', 'figure', 'table', 'th', 'tr', 'td', 'video', 'audio', 'form', 'input', 'label', 'button']