from io import BufferedReader
from tqdm import tqdm
from functools import partial
from sys import argv
import numpy as np
from os import path, listdir

# Makes the "leave" argument default to False
tqdm = partial(tqdm, leave = False)

def read_pgm(pgm_file: BufferedReader) -> np.ndarray:
	"""Read the data of a PGM file

	Args:
		pgm_file (BufferedReader): PGM File

	Returns:
		np.ndarray: PGM data
	"""
	assert (f := pgm_file.readline()) == b'P5\n', f"Incorrect file format: {f}"
	(width, height) = [int(i) for i in pgm_file.readline().split()]
	assert width > 0 and height > 0, f"Incorrect dimensions: {width}x{height}"
	assert (depth := int(pgm_file.readline())) < 256, f"Incorrect depth: {depth}"

	buff = np.empty(height * width, dtype = np.uint8)
	for i in range(buff.shape[0]):
		buff[i] = ord(pgm_file.read(1))
	return buff.reshape((height, width))

def __main__(data_path: str) -> None:
	"""Read the data of every PGM file and output it in data files

	Args:
		data_path (str): Path of the PGM files
	"""
	for set_name in tqdm(["train", "test"], desc = "set name"):
		X, y = [], []
		for y_i, label in enumerate(tqdm(["non-face", "face"], desc = "label")):
			for filename in tqdm(listdir(f"{data_path}/{set_name}/{label}"), desc = "Reading pgm file"):
				with open(f"{data_path}/{set_name}/{label}/{filename}", "rb") as face:
					X.append(read_pgm(face))
					y.append(y_i)

		X, y = np.asarray(X), np.asarray(y)
		# idx = np.random.permutation(y.shape[0])
		# X, y = X[idx], y[idx]

		for org, s in tqdm(zip("Xy", [X, y]), desc = f"Writing {set_name}"):
			with open(f"{data_path}/{org}_{set_name}.bin", "w") as out:
				out.write(f'{str(s.shape)[1:-1].replace(",", "")}\n')
				raw = s.ravel()
				for s_i in tqdm(raw[:-1], desc = f"Writing {org}"):
					out.write(f"{s_i} ")
				out.write(str(raw[-1]))

if __name__ == "__main__":
	__main__(argv[1]) if len(argv) == 2 else print(f"Usage: python {__file__[__file__.rfind(path.sep) + 1:]} ./data_location")