blogit.py 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553
  1. # ============================================================================
  2. # Blogit.py is free software; you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License, version 3
  4. # as published by the Free Software Foundation;
  5. #
  6. # Blogit.py is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with Blogit.py; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. # ============================================================================
  15. # Copyright (C) 2013-2016 Oz Nahum Tiram <nahumoz@gmail.com>
  16. # ============================================================================
  17. from __future__ import print_function
  18. import os
  19. import re
  20. import datetime
  21. import argparse
  22. import sys
  23. import operator
  24. import shutil
  25. from StringIO import StringIO
  26. import codecs
  27. import subprocess as sp
  28. import SimpleHTTPServer
  29. import BaseHTTPServer
  30. import socket
  31. import SocketServer
  32. from jinja2 import Environment, FileSystemLoader
  33. import markdown2
  34. import tinydb
  35. from tinydb import Query, where
  36. sys.path.insert(0, os.getcwd())
  37. from conf import CONFIG, GLOBAL_TEMPLATE_CONTEXT
  38. # with this config, pages are rendered to the location of their title
  39. KINDS = {
  40. 'writing': {
  41. 'name': 'writing', 'name_plural': 'writings',
  42. },
  43. 'note': {
  44. 'name': 'note', 'name_plural': 'notes',
  45. },
  46. 'link': {
  47. 'name': 'link', 'name_plural': 'links',
  48. },
  49. 'photo': {
  50. 'name': 'photo', 'name_plural': 'photos',
  51. },
  52. }
  53. jinja_env = Environment(lstrip_blocks=True, trim_blocks=True,
  54. loader=FileSystemLoader(CONFIG['templates']))
  55. class DataBase(object): # pragma: no coverage
  56. """A thin wrapper around TinyDB instance"""
  57. def __init__(self, path):
  58. _db = tinydb.TinyDB(path)
  59. self.posts = _db.table('posts')
  60. self.tags = _db.table('tags')
  61. self.pages = _db.table('pages')
  62. self.templates = _db.table('templates')
  63. self._db = _db
  64. DB = DataBase(os.path.join(CONFIG['content_root'], 'blogit.db'))
  65. class Tag(object):
  66. table = DB.tags
  67. db = DB
  68. def __init__(self, name):
  69. self .name = name
  70. self.permalink = GLOBAL_TEMPLATE_CONTEXT["site_url"]
  71. Tags = Query()
  72. tag = self.table.get(Tags.name == self.name)
  73. if not tag:
  74. self.table.insert({'name': self.name, 'post_ids': []})
  75. def __str__(self):
  76. return self.name
  77. def __repr__(self): # pragma: no coverage
  78. return self.name
  79. @property
  80. def slug(self):
  81. _slug = self.name.lower()
  82. _slug = re.sub(r'[;:,. ]+', '-', _slug.lstrip(',.;:-'))
  83. return _slug.lstrip('-')
  84. @property
  85. def posts(self):
  86. """
  87. return a list of post ids tagged with Tag
  88. """
  89. Tags = Query()
  90. tag = self.table.get(Tags.name == self.name)
  91. return tag['post_ids']
  92. @posts.setter
  93. def posts(self, post_ids):
  94. if not isinstance(post_ids, list):
  95. raise ValueError("post_ids must be of type list")
  96. Tags = Query()
  97. tag = self.table.get(Tags.name == self.name)
  98. # if not tag: # pragma: no coverage
  99. # raise ValueError("Tag %s not found" % self.name)
  100. # else:
  101. new = set(post_ids) - set(tag['post_ids'])
  102. tag['post_ids'].extend(list(new))
  103. self.table.update({'post_ids': tag['post_ids']}, eids=[tag.eid])
  104. @property
  105. def entries(self):
  106. """return the actual lists of entries tagged with"""
  107. Posts = Query()
  108. for id in self.posts:
  109. post = self.db.posts.get(eid=id)
  110. if not post:
  111. raise ValueError("no post found for eid %s" % id)
  112. yield Entry(os.path.join(CONFIG['content_root'], post['filename']), id)
  113. def render(self):
  114. """Render html page and atom feed"""
  115. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  116. context['tag'] = self
  117. context['entries'] = _sort_entries(self.entries)
  118. # render html page
  119. render_to = os.path.join(CONFIG['output_to'], 'tags', self.slug)
  120. if not os.path.exists(render_to): # pragma: no coverage
  121. os.makedirs(render_to)
  122. _render(context, 'tag_index.html', os.path.join(render_to, 'index.html'))
  123. # render atom.xml
  124. context['entries'] = context['entries'][:10]
  125. context['last_build'] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
  126. _render(context, 'atom.xml', os.path.join(render_to, 'atom.xml'))
  127. return True
  128. class Entry(object):
  129. """This is the base class for creating an HTML page from a Markdown
  130. based page.
  131. The file has the following structure for a page:
  132. .. code:
  133. ---
  134. title: example page
  135. public: yes
  136. kind: page
  137. template: about.html
  138. ---
  139. # some heading
  140. content paragraph
  141. ## heading 2
  142. some more content
  143. The file has the following structure for a blog entry:
  144. .. code:
  145. ---
  146. title: Blog post 1
  147. author: Famous author
  148. published: 2015-01-11
  149. tags: [python, git, bash, linux]
  150. public: yes
  151. chronological: yes
  152. kind: writing
  153. summary: This is a summry of post 1. Donec id elit non mi porta
  154. ---
  155. This is the body of post 1. Donec id elit non mi porta gravida
  156. """
  157. db = DB
  158. @classmethod
  159. def entry_from_db(kls, filename, eid=None):
  160. f = os.path.join(filename)
  161. return kls(f, eid)
  162. def __init__(self, path, eid=None):
  163. self._path = path
  164. self.path = path.split(CONFIG['content_root'])[-1].lstrip('/')
  165. self.id = eid # this is set inside prepare()
  166. try:
  167. self.prepare()
  168. except KeyError as E:
  169. pass
  170. def __str__(self):
  171. return self.path
  172. def __repr__(self): # pragma: no coverage
  173. return self.path
  174. @property
  175. def name(self):
  176. return os.path.splitext(os.path.basename(self.path))[0]
  177. @property
  178. def abspath(self):
  179. return self._path
  180. @property
  181. def destination(self):
  182. return os.path.join(CONFIG['output_to'], self.permalink)
  183. @property
  184. def title(self):
  185. return self.header['title']
  186. @property
  187. def summary_atom(self):
  188. summarya = markdown2.markdown(self.header.get('summary', "").strip())
  189. summarya = re.sub("<p>|</p>", "", summarya)
  190. more = '<a href="%s"> continue reading...</a>' % (self.permalink)
  191. return summarya+more
  192. @property
  193. def publish_date(self):
  194. return self.header.get('published',
  195. datetime.date.today().strftime("%Y-%m-%d"))
  196. @property
  197. def permalink(self):
  198. if self.kind == 'page':
  199. dest = '%s.html' % self.title.replace('/', "-")
  200. else:
  201. dest = "%s/%s/index.html" % (KINDS[self.kind]['name_plural'], self.name)
  202. dest = dest.lstrip('/')
  203. return dest
  204. @property
  205. def tags(self):
  206. """this property is always called after prepare"""
  207. if 'tags' in self.header:
  208. tags = [Tag(t) for t in self.header['tags']]
  209. map(lambda t: setattr(t, 'posts', [self.id]), tags)
  210. return tags
  211. else:
  212. return []
  213. def prepare(self):
  214. self.body_html = markdown2.markdown(
  215. codecs.open(self.abspath, 'r').read(),
  216. extras=['fenced-code-blocks', 'hilite', 'tables', 'metadata'])
  217. self.header = self.body_html.metadata
  218. if 'tags' in self.header: # pages can lack tags
  219. self.header['tags'] = self.header['tags'].split(',')
  220. self.date = self.header.get('published', datetime.datetime.now())
  221. if isinstance(self.date, unicode):
  222. self.date = datetime.datetime.strptime(self.date, "%Y-%m-%d")
  223. for k, v in self.header.items():
  224. try:
  225. setattr(self, k, v)
  226. except AttributeError:
  227. pass
  228. if self.id:
  229. return
  230. if self.header['kind'] == 'writing':
  231. _id = Entry.db.posts.insert({'filename': self.path})
  232. elif self.header['kind'] == 'page':
  233. _id = Entry.db.pages.insert({'filename': self.path})
  234. self.id = _id
  235. def render(self):
  236. if self.header.get('public', '').lower() in ['true', 'yes']:
  237. try:
  238. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  239. context['entry'] = self
  240. _render(context, self.header.get('template', 'entry.html'),
  241. self.destination)
  242. return True
  243. except Exception as e: # pragma: no cover
  244. print(context)
  245. print(self.path)
  246. print(e)
  247. sys.exit(1)
  248. def _sort_entries(entries, reversed=True):
  249. """Sort all entries by date and reverse the list"""
  250. return list(sorted(entries, key=operator.attrgetter('date'), reverse=reversed))
  251. def _render(context, template_path, output_path, encoding='utf-8'):
  252. template = jinja_env.get_template(template_path)
  253. rendered = template.render(context)
  254. html = template.render(context)
  255. try:
  256. os.makedirs(os.path.dirname(output_path))
  257. except OSError:
  258. pass
  259. destination = codecs.open(output_path, 'w', encoding)
  260. destination.write(html)
  261. destination.close()
  262. def render_archive(entries):
  263. """
  264. This function creates the archive page
  265. """
  266. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  267. context['entries'] = entries
  268. _render(context, 'archive_index.html',
  269. os.path.join(CONFIG['output_to'],'archive/index.html')),
  270. def find_new_posts_and_pages(db):
  271. """Walk content dir, put each post and page in the database"""
  272. Q = Query()
  273. for root, dirs, files in os.walk(CONFIG['content_root']):
  274. for filename in files:
  275. if filename.endswith(('md', 'markdown')):
  276. fullpath = os.path.join(root, filename)
  277. _p = fullpath.split(CONFIG['content_root'])[-1].lstrip('/')
  278. if not db.posts.contains(Q.filename == _p) and \
  279. not db.pages.contains(Q.filename == _p):
  280. e = Entry(fullpath)
  281. yield e, e.id
  282. def _get_last_entries(db):
  283. eids = [post.eid for post in db.posts.all()]
  284. eids = sorted(eids)[-10:][::-1]
  285. entries = [Entry(os.path.join(CONFIG['content_root'],
  286. db.posts.get(eid=eid)['filename']), eid) for eid in eids]
  287. return entries
  288. def update_index(entries):
  289. """find the last 10 entries in the database and create the main
  290. page.
  291. Each entry in has an eid, so we only get the last 10 eids.
  292. This method also updates the ATOM feed.
  293. """
  294. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  295. context['entries'] = entries
  296. context['last_build'] = datetime.datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")
  297. map(lambda x: _render(
  298. context, x[0], os.path.join(CONFIG['output_to'], x[1])),
  299. (('entry_index.html', 'index.html'), ('atom.xml', 'atom.xml')))
  300. def build(config):
  301. """Incremental build of the website"""
  302. print("\nRendering website now...\n")
  303. print("entries:")
  304. tags = dict()
  305. entries = list()
  306. root = CONFIG['content_root']
  307. for post, post_id in find_new_posts_and_pages(DB):
  308. # this method will also parse the post's tags and
  309. # update the db collection containing the tags.
  310. if post.render():
  311. if post.header['kind'] in ['writing', 'link']:
  312. for tag in post.tags:
  313. tag.posts = [post_id]
  314. tags[tag.name] = tag
  315. entries.append(post)
  316. print("%s" % post.path)
  317. for name, to in tags.iteritems():
  318. print("updating tag %s" % name)
  319. to.render()
  320. # This is expensive, we should insert only the recent entries
  321. # to the index using BeautifulSoup
  322. # update index
  323. print("updating index")
  324. update_index(_get_last_entries(DB))
  325. # update archive
  326. print("updating archive")
  327. # This is expensive, we should insert only the recent entries
  328. # to the archive using BeautifulSoup
  329. entries = [Entry.entry_from_db(
  330. os.path.join(CONFIG['content_root'], e.get('filename')), e.eid) for e in
  331. DB.posts.all()]
  332. render_archive(_sort_entries(entries, reversed=True)[config['ARCHIVE_SIZE']:])
  333. class StoppableHTTPServer(BaseHTTPServer.HTTPServer): # pragma: no coverage
  334. def server_bind(self):
  335. BaseHTTPServer.HTTPServer.server_bind(self)
  336. self.socket.settimeout(1)
  337. self.run = True
  338. def get_request(self):
  339. while self.run:
  340. try:
  341. sock, addr = self.socket.accept()
  342. sock.settimeout(None)
  343. return (sock, addr)
  344. except socket.timeout:
  345. pass
  346. def stop(self):
  347. self.run = False
  348. def serve(self):
  349. while self.run:
  350. self.handle_request()
  351. def preview(): # pragma: no coverage
  352. """launch an HTTP to preview the website"""
  353. Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
  354. SocketServer.TCPServer.allow_reuse_address = True
  355. port = CONFIG['http_port']
  356. httpd = SocketServer.TCPServer(("", port), Handler)
  357. os.chdir(CONFIG['output_to'])
  358. print("and ready to test at http://127.0.0.1:%d" % CONFIG['http_port'])
  359. print("Hit Ctrl+C to exit")
  360. try:
  361. httpd.serve_forever()
  362. except KeyboardInterrupt:
  363. httpd.shutdown()
  364. def publish(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  365. sp.call('git push', cwd=GITDIRECTORY, shell=True)
  366. def new_post(GITDIRECTORY=CONFIG['output_to'],
  367. kind=KINDS['writing']): # pragma: no coverage
  368. """
  369. This function should create a template for a new post with a title
  370. read from the user input.
  371. Most other fields should be defaults.
  372. TODO: update this function
  373. """
  374. title = raw_input("Give the title of the post: ")
  375. while ':' in title:
  376. title = raw_input("Give the title of the post (':' not allowed): ")
  377. author = CONFIG['author']
  378. date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
  379. tags = raw_input("Give the tags, separated by ', ':")
  380. published = 'yes'
  381. chronological = 'yes'
  382. summary = ("summary: Type your summary here.")
  383. # make file name
  384. fname = os.path.join(os.getcwd(), 'content', kind['name_plural'],
  385. datetime.datetime.strftime(datetime.datetime.now(),
  386. '%Y'),
  387. date+'-'+title.replace(' ', '-')+'.markdown')
  388. with open(fname, 'w') as npost:
  389. npost.write('---\n')
  390. npost.write('title: %s\n' % title)
  391. npost.write('author: %s\n' % author)
  392. npost.write('published: %s\n' % date)
  393. npost.write('tags: %s\n' % tags)
  394. npost.write('public: %s\n' % published)
  395. npost.write('chronological: %s\n' % chronological)
  396. npost.write('kind: %s\n' % kind['name'])
  397. npost.write('%s' % summary)
  398. npost.write('---\n')
  399. print('%s %s' % (CONFIG['editor'], repr(fname)))
  400. os.system('%s %s' % (CONFIG['editor'], fname))
  401. def clean(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  402. directoriestoclean = ["writings", "notes", "links", "tags", "archive"]
  403. os.chdir(GITDIRECTORY)
  404. for directory in directoriestoclean:
  405. shutil.rmtree(directory)
  406. def main(): # pragma: no coverage
  407. parser = argparse.ArgumentParser(
  408. description='blogit - a tool to blog on github.')
  409. parser.add_argument('-b', '--build', action="store_true",
  410. help='convert the markdown files to HTML')
  411. parser.add_argument('-p', '--preview', action="store_true",
  412. help='Launch HTTP server to preview the website')
  413. parser.add_argument('-c', '--clean', action="store_true",
  414. help='clean output files')
  415. parser.add_argument('-n', '--new', action="store_true",
  416. help='create new post')
  417. parser.add_argument('--publish', action="store_true",
  418. help='push built HTML to git upstream')
  419. args = parser.parse_args()
  420. if not os.path.exists(os.path.join(CONFIG['content_root'])):
  421. os.makedirs(os.path.join(CONFIG['content_root']))
  422. if len(sys.argv) < 2:
  423. parser.print_help()
  424. sys.exit()
  425. if args.clean:
  426. clean()
  427. if args.build:
  428. build(CONFIG)
  429. if args.preview:
  430. preview()
  431. if args.new:
  432. new_post()
  433. if args.publish:
  434. publish()
  435. if __name__ == '__main__': # pragma: no coverage
  436. main()