Download raw (2.9 KB)
import json, re f = open("content.new.md", "r") content = f.read() # algaes_dicts = [{ name.lower(): value.strip() for name, value in re.findall(r'^#\s*([\s\w]+)\n(.[^#]+)', algae, flags=re.M+re.DOTALL)} for algae in re.split(r'^\-+$', content, flags=re.M)] # print(algaes_dicts) separator_pattern = re.compile(r'^\-+$') section_header_pattern = re.compile(r'^#[\w\s+]+') subheader_pattern = re.compile(r'#+\s*(.+)$') definition_list_pattern = re.compile(r'^([\w\-\s]+):\s*(.+)$') def parse_section_header (line): return re.sub(r'^#\s*', '', line).lower() def parse_line (line): if subheader_pattern.match(line): line = subheader_pattern.sub(lambda m: m.group(1).upper(), line) if definition_list_pattern.match(line): line = definition_list_pattern.sub(lambda m: '{}: {}'.format(m.group(1).upper(), m.group(2)), line) return line buff = [] lines = content.split('\n') section_buff = {} section_header = None section_lines = None def close_section(): global buff, section_header, section_lines if section_header: section_buff[section_header] = '\r\n'.join(section_lines).strip() section_header = None section_lines = None def open_section (name): global section_header, section_lines section_header = name section_lines = [] for line in lines: if separator_pattern.match(line): close_section() buff.append(section_buff) section_buff = {} elif section_header_pattern.match(line): close_section() open_section(parse_section_header(line)) elif type(section_lines) is list: section_lines.append(parse_line(line)) close_section() buff.append(section_buff) print(buff) # def parse_value # print(name, value) # algdict = {} # for line in algae.split("\n"): # if line.startswith("# "): # Title = line.replace("# ", "") # algdict["title"] = Title # if line.startswith("## B"): # bio_header = line.replace("## Biomass: ", "") # algdict["b_header"] = bio_header # if line.startswith("## P"): # pig_header = line.replace("## Pigment: ", "") # algdict["p_header"] = pig_header # if line.startswith("### B"): # bio_body = line.replace("### Biomass: ", "") # algdict["b_body"] = bio_body # if line.startswith("### P"): # pig_body = line.replace("### Pigment: ", "") # algdict["p_body"] = pig_body # if line.startswith("#### "): # main = line.replace("#### ", "") # algdict["main"] = main # if line.startswith("##### "): # list_info = line.replace("##### ", "") # algdict["list_info"] = list_info # algaes_dicts.append(algdict) with open('formatted_content.json', 'w', encoding='utf-8') as fp: json.dump(buff, fp, sort_keys=True, indent=2, ensure_ascii=False)