colorlab
clone your own copy | download snapshot

Snapshots | iceberg

Inside this repository

parser.new.py
text/x-python

Download raw (2.9 KB)

import json, re

f = open("content.new.md", "r")
content = f.read()

# algaes_dicts = [{ name.lower(): value.strip() for name, value in re.findall(r'^#\s*([\s\w]+)\n(.[^#]+)', algae, flags=re.M+re.DOTALL)} for algae in re.split(r'^\-+$', content, flags=re.M)]  


# print(algaes_dicts)

separator_pattern = re.compile(r'^\-+$')
section_header_pattern = re.compile(r'^#[\w\s+]+')

subheader_pattern = re.compile(r'#+\s*(.+)$')
definition_list_pattern = re.compile(r'^([\w\-\s]+):\s*(.+)$')

def parse_section_header (line):
  return re.sub(r'^#\s*', '', line).lower()

def parse_line (line):
  if subheader_pattern.match(line):
    line = subheader_pattern.sub(lambda m: m.group(1).upper(), line)
  if definition_list_pattern.match(line):
    line = definition_list_pattern.sub(lambda m: '{}: {}'.format(m.group(1).upper(), m.group(2)), line)

  return line
buff = []
lines = content.split('\n')
section_buff = {}
section_header = None
section_lines = None

def close_section():
  global buff, section_header, section_lines

  if section_header:
    section_buff[section_header] = '\r\n'.join(section_lines).strip()

  section_header = None
  section_lines = None

def open_section (name):
  global section_header, section_lines
  section_header = name
  section_lines = []

for line in lines:
  if separator_pattern.match(line):
    close_section()
    buff.append(section_buff)
    section_buff = {}

  elif section_header_pattern.match(line):
    close_section()
    open_section(parse_section_header(line))
    
  elif type(section_lines) is list:
    section_lines.append(parse_line(line))

close_section()
buff.append(section_buff)

print(buff)

# def parse_value 

  # print(name, value)
#     algdict = {}
#     for line in algae.split("\n"):
#         if line.startswith("# "):
#             Title = line.replace("# ", "")
#             algdict["title"] = Title
            
#         if line.startswith("## B"):
#             bio_header = line.replace("## Biomass: ", "")
#             algdict["b_header"] = bio_header
            
#         if line.startswith("## P"):
#             pig_header = line.replace("## Pigment: ", "")
#             algdict["p_header"] = pig_header
            
#         if line.startswith("### B"):
#             bio_body = line.replace("### Biomass: ", "")
#             algdict["b_body"] = bio_body
            
#         if line.startswith("### P"):
#             pig_body = line.replace("### Pigment: ", "")
#             algdict["p_body"] = pig_body
            
#         if line.startswith("#### "):
#             main = line.replace("#### ", "")
#             algdict["main"] = main

#         if line.startswith("##### "):
#             list_info = line.replace("##### ", "")
#             algdict["list_info"] = list_info
            
#     algaes_dicts.append(algdict)

with open('formatted_content.json', 'w', encoding='utf-8') as fp:
  json.dump(buff, fp, sort_keys=True, indent=2, ensure_ascii=False)