diff --git a/src/main/kotlin/biokotlin/gff/Chromosome.kt b/src/main/kotlin/biokotlin/gff/Chromosome.kt
new file mode 100644
index 00000000..18af4181
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Chromosome.kt
@@ -0,0 +1,14 @@
+package biokotlin.gff
+
+class Chromosome (
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, attributes = attributes, children = children) {
+
+    override fun type(): FeatureType = FeatureType.Chromosome
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Coding.kt b/src/main/kotlin/biokotlin/gff/Coding.kt
new file mode 100644
index 00000000..21d692c0
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Coding.kt
@@ -0,0 +1,20 @@
+package biokotlin.gff
+
+/**
+ * Also known as CDS
+ */
+class Coding(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    score: Double = Double.NaN,
+    strand: String = "+",
+    phase: String = ".",
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, score, strand, phase, attributes, children) {
+
+    override fun type(): FeatureType = FeatureType.Coding
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Exon.kt b/src/main/kotlin/biokotlin/gff/Exon.kt
new file mode 100644
index 00000000..65e9d1f4
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Exon.kt
@@ -0,0 +1,29 @@
+package biokotlin.gff
+
+class Exon(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    score: Double = Double.NaN,
+    strand: String = "+",
+    phase: String = ".",
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, score, strand, phase, attributes, children) {
+
+    override fun type(): FeatureType = FeatureType.Exon
+
+    fun coding(): List<Coding> {
+        return children.filterIsInstance<Coding>().toList()
+    }
+
+    fun leaders(): List<Leader> {
+        return children.filterIsInstance<Leader>().toList()
+    }
+
+    fun terminators(): List<Terminator> {
+        return children.filterIsInstance<Terminator>().toList()
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Feature.kt b/src/main/kotlin/biokotlin/gff/Feature.kt
new file mode 100644
index 00000000..8f3e6657
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Feature.kt
@@ -0,0 +1,158 @@
+package biokotlin.gff
+
+enum class FeatureType {
+    Gene, Exon, Leader, Terminator, Coding, mRNA, Intron, Chromosome, Scaffold;
+
+    companion object {
+        /**
+         * Converts from standard names in GFF files to a FeatureType. Case-insensitive
+         */
+        fun fromGffString(gffString: String): FeatureType {
+            return when (gffString.lowercase()) {
+                "gene" -> Gene
+                "exon" -> Exon
+                "five_prime_utr" -> Leader
+                "three_prime_utr" -> Terminator
+                "cds" -> Coding
+                "mrna" -> mRNA
+                "intron" -> Intron
+                "chromosome" -> Chromosome
+                "scaffold" -> Scaffold
+                else -> throw Exception("Feature $gffString is not supported")
+            }
+        }
+    }
+}
+
+/**
+ * @param seqid The ID of the landmark used to establish the coordinate system for the current feature. IDs may contain
+ * any characters, but must escape any characters not in the set [a-zA-Z0-9.:^*$@!+_?-|]. In particular, IDs may not
+ * contain unescaped whitespace and must not begin with an unescaped ">".
+ * @param source The source is a free text qualifier intended to describe the algorithm or operating procedure that
+ * generated this feature. Typically this is the name of a piece of software, such as "Genescan" or a database name,
+ * such as "Genbank." In effect, the source is used to extend the feature ontology by adding a qualifier to the type
+ * creating a new composite type that is a subclass of the type in the type column.
+ * @param start The start and end coordinates of the feature are given in positive 1-based integer coordinates, relative
+ * to the landmark given in column one. Start is always less than or equal to end. For features that cross the origin
+ * of a circular feature (e.g. most bacterial genomes, plasmids, and some viral genomes), the requirement for start to
+ * be less than or equal to end is satisfied by making end = the position of the end + the length of the landmark feature.
+ * @param end The start and end coordinates of the feature are given in positive 1-based integer coordinates, relative
+ * to the landmark given in column one. Start is always less than or equal to end. For features that cross the origin
+ * of a circular feature (e.g. most bacterial genomes, plasmids, and some viral genomes), the requirement for start to
+ * be less than or equal to end is satisfied by making end = the position of the end + the length of the landmark feature.
+ * @param score The score of the feature, a floating point number. As in earlier versions of the format, the semantics
+ * of the score are ill-defined. It is strongly recommended that E-values be used for sequence similarity features,
+ * and that P-values be used for ab initio gene prediction features.
+ * @param strand The strand of the feature. + for positive strand (relative to the landmark), - for minus strand,
+ * and . for features that are not stranded. In addition, ? can be used for features whose strandedness is relevant,
+ * but unknown.
+ * @param phase For features of type "CDS", the phase indicates where the next codon begins relative to the 5' end
+ * (where the 5' end of the CDS is relative to the strand of the CDS feature) of the current CDS feature. For
+ * clarification the 5' end for CDS features on the plus strand is the feature's start and and the 5' end for CDS
+ * features on the minus strand is the feature's end. The phase is one of the integers 0, 1, or 2, indicating the
+ * number of bases forward from the start of the current CDS feature the next codon begins. A phase of "0" indicates
+ * that a codon begins on the first nucleotide of the CDS feature (i.e. 0 bases forward), a phase of "1" indicates
+ * that the codon begins at the second nucleotide of this CDS feature and a phase of "2" indicates that the codon
+ * begins at the third nucleotide of this region. Note that ‘Phase’ in the context of a GFF3 CDS feature should not
+ * be confused with the similar concept of frame that is also a common concept in bioinformatics. Frame is generally
+ * calculated as a value for a given base relative to the start of the complete open reading frame (ORF) or the
+ * codon (e.g. modulo 3) while CDS phase describes the start of the next codon relative to a given CDS feature.
+ * @param attributes A list of feature attributes in the format tag=value. Multiple tag=value pairs are separated
+ * by semicolons. URL escaping rules are used for tags or values containing the following characters: ",=;". Spaces
+ * are allowed in this field, but tabs must be replaced with the %09 URL escape. Attribute values do not need to be
+ * and should not be quoted. The quotes should be included as part of the value by parsers and not stripped.
+ * @param children TODO
+ * @see FeatureBuilder
+ */
+abstract class Feature(
+    val seqid: String,
+    val source: String,
+    val start: Int,
+    val end: Int,
+    val score: Double = Double.NaN,
+    val strand: String = "+",
+    val phase: String = ".",
+    var attributes: Map<String, String> = emptyMap(),
+    var children: List<Feature> = emptyList()
+) {
+
+    init {
+        attributes = attributes.toMap()
+        children = children.sortedWith(FeatureComparator())
+    }
+
+    abstract fun type(): FeatureType
+
+    fun attribute(key: String) = attributes[key]
+
+    //TODO make this an actual pointer and handle multiple parents
+    fun parent(): Feature = TODO()
+
+    fun id() = attributes["ID"]
+
+    fun name() = attributes["Name"]
+
+    fun alias() = attributes["Alias"]
+
+    fun target() = attributes["Target"]
+
+    fun gap() = attributes["Gap"]
+
+    fun derivesFrom() = attributes["Derives_from"]
+
+    fun note() = attributes["Note"]
+
+    fun dbxref() = attributes["Dbxref"]
+
+    fun ontologyTerm() = attributes["Ontology_term"]
+
+    fun isCircular() = attributes["Is_circular"]
+
+    /**
+     * Compares this to [other] alphabetically by seqid, then by start, then by end position.
+     * Returns zero if this and [other] are equal in ordering, a negative number if this is less
+     * than [other], or a positive number if this is greater than [other].
+     */
+    fun compareTo(other: Feature): Int {
+        return if (seqid.compareTo(other.seqid) == 0) {
+            if (start.compareTo(other.start) == 0) {
+                end.compareTo(other.end)
+            } else {
+                start.compareTo(other.start)
+            }
+        } else {
+            seqid.compareTo(other.seqid)
+        }
+    }
+    /**
+     * Returns the feature as a string representing row in a GFF file
+     */
+    override fun toString(): String {
+        val scoreString = if (score.isNaN()) {
+            "."
+        } else {
+            score.toString()
+        }
+
+        val attributesString = StringBuilder()
+        for ((tag, value) in attributes) {
+            attributesString.append("$tag=$value;")
+        }
+
+        return "$seqid\t$source\t${type()}\t$start\t$end\t$scoreString\t$strand\t$phase\t${attributesString}\n"
+    }
+}
+
+/**
+ * Provides ordering for feature
+ */
+class FeatureComparator: Comparator<Feature> {
+    /**
+     * Returns the same result as [p0].compareTo([p1]) unless one of the arguments is null, in which case it returns 0.
+     */
+    override fun compare(p0: Feature?, p1: Feature?): Int {
+        if (p0 == null || p1 == null) return 0
+
+        return p0.compareTo(p1)
+    }
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/FeatureBuilder.kt b/src/main/kotlin/biokotlin/gff/FeatureBuilder.kt
new file mode 100644
index 00000000..856081da
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/FeatureBuilder.kt
@@ -0,0 +1,50 @@
+package biokotlin.gff
+
+//TODO add pointer to parent(s)
+/**
+ * A mutable representation of a genetic feature that can be built into a [Feature].
+ * @see Feature
+ */
+class FeatureBuilder(
+    val seqid: String,
+    val source: String,
+    val type: FeatureType,
+    val start: Int,
+    val end: Int,
+    val score: Double = 0.0,
+    val strand: String = "+",
+    val phase: String = ".",
+    var attributes: Map<String, String> = emptyMap()
+) {
+
+    val children = mutableListOf<FeatureBuilder>()
+
+    fun id() = attributes["ID"]
+
+    fun add(child: FeatureBuilder) {
+        children.add(child)
+    }
+
+    /**
+     * Builds this feature and its children, recursively.
+     * @return An immutable [Feature] with the properties of this [FeatureBuilder] and whose children are built
+     * versions of the this [FeatureBuilder]'s children, sorted by [FeatureComparator].
+     */
+    fun build(): Feature {
+
+        val children = children.map { it.build() }
+        return when (type) {
+            FeatureType.Gene -> Gene(seqid, source, start, end, score, strand, phase, attributes, children)
+            FeatureType.Exon -> Exon(seqid, source, start, end, score, strand, phase, attributes, children)
+            FeatureType.Leader -> Leader(seqid, source, start, end, score, strand, phase, attributes, children)
+            FeatureType.Terminator -> Terminator(seqid, source, start, end, score, strand, phase, attributes, children)
+            FeatureType.Coding -> Coding(seqid, source, start, end, score, strand, phase, attributes, children)
+            FeatureType.mRNA -> MRNA(seqid, source, start, end, score, strand, phase, attributes, children)
+            FeatureType.Intron -> Intron(seqid, source, start, end)
+            FeatureType.Chromosome -> Chromosome(seqid, source, start, end, attributes, children)
+            FeatureType.Scaffold -> Scaffold(seqid, source, start, end, attributes, children)
+        }
+
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Gene.kt b/src/main/kotlin/biokotlin/gff/Gene.kt
new file mode 100644
index 00000000..3f945339
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Gene.kt
@@ -0,0 +1,22 @@
+package biokotlin.gff
+
+
+class Gene(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    score: Double = Double.NaN,
+    strand: String = "+",
+    phase: String = ".",
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, score, strand, phase, attributes, children) {
+
+    override fun type(): FeatureType = FeatureType.Gene
+
+    fun mRNAs(): List<MRNA> {
+        return children.filterIsInstance<MRNA>().toList()
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/GffTree.kt b/src/main/kotlin/biokotlin/gff/GffTree.kt
new file mode 100644
index 00000000..e9492971
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/GffTree.kt
@@ -0,0 +1,165 @@
+package biokotlin.gff
+
+import biokotlin.util.bufferedReader
+
+/**
+ * A tree representation of a GFF file, with parent/child relationships in the tree created from the Parent attribute
+ * of elements in the GFF file.
+ * @param gff Path to the GFF file
+ */
+class GffTree(val gff: String): Iterable<Feature> {
+    /**
+     * The top-level features of the gff, sorted by [biokotlin.gff.FeatureComparator]
+     */
+    val roots = parseRoots(gff)
+
+    /**
+     * A map of all IDs to the feature with that ID. Features without IDs are excluded.
+     * TODO This shouldn't be a list
+     */
+    val idMap: Map<String, Feature> by lazy {
+        getMap ( {it.id()} )
+    }
+
+    //TODO add more map presets
+
+    /**
+     * Iterates over the GffTree. Starts at the first root (as defined by the order in [biokotlin.gff.FeatureComparator]),
+     * performs a depth-first traversal, then moves to the next root.
+     */
+    override fun iterator(): Iterator<Feature> {
+        return toList().iterator()
+    }
+
+    /**
+     * Converts the tree representation to a list. The first element is the first root (as defined by the order in
+     * [biokotlin.gff.FeatureComparator]), then a depth-first ordering of its children, then the next root and a
+     * depth-first ordering of its children, etc.
+     */
+    fun toList(): List<Feature> {
+        TODO("Not yet implemented")
+    }
+
+    /**
+     * Returns a map where each feature in the [GffTree] is added to a list with the key `mapping(feature)`.
+     * The features in each list will be ordered such that earlier elements in [iterator] are prior to later elements
+     * in [iterator]. When [mapping] returns `null`, no key-value pair is added to the map. The insertion order of
+     * the map follows the order of [iterator].
+     *
+     * This can be useful for accessing features by their non-standard attributes
+     * ```kotlin
+     * val gffTree = GffTree("/home/user/b73.gff3")
+     * //Map the features by the non-standard protein_id attribute
+     * val proteinIDMap = gffTree.getMap { it.attributes["protein_id"] }
+     * //Access all features that have "Zm00001eb000020_P004" in their "protein_id" attribute
+     * val features = proteinIDMap["Zm00001eb000020_P004"]
+     * ```
+     *
+     * Combined with custom data classes, this method also allows for convenient access of all features
+     * that have a particular combination of properties.
+     *
+     * Let's say you have a GFF where exons do not have unique IDs, but they can be uniquely defined by their parent's
+     * ID and their rank.
+     * ```kotlin
+     * data class parentAndRank(val parent: String, val rank: String)
+     * val gffTree = GffTree("/home/user/myGff.gff3")
+     * val parentAndRankMap = gffTree.getMap { feature ->
+     *  val parent = feature.attributes["Parent"]
+     *  val rank = feature.attributes["rank"]
+     *  //Do not create mappings for anything that does not have a parent or a rank
+     *  if (parent == null || rank == null) null
+     *  else parentAndRank(parent, rank)
+     * }
+     * //Pull out the third exon on transcript Zm00001eb442870_T001
+     * val exon = parentAndRankMap[parentAndRank("Zm00001eb442870_T001", "3")]
+     * ```
+     *
+     * @see idMap
+     * @param mapping The function that produces keys for a given feature
+     */
+    fun <T> getMap(mapping: (Feature) -> T?): Map<T, List<Feature>> {
+        TODO("Not yet implemented")
+    }
+
+}
+
+fun main() {
+    val gffTree = GffTree("/home/user/myGff.gff3")
+    val proteinIDs = gffTree.getMap { it.attributes["protein_id"] }
+    val protein = proteinIDs["Zm00001eb442910_P001"] // List of CDS regions
+    val cds = protein?.get(0)
+    val transcript = cds?.parent()
+    val gene = transcript?.parent()
+
+/**
+ * Parses a GFF and returns a list of top-level features sorted by [biokotlin.gff.FeatureComparator]
+ * @param gff Path to gff file
+ */
+private fun parseRoots(gff: String): List<Feature> {
+    val file = bufferedReader(gff)
+    val roots = MutableList(0) { FeatureBuilder("", "", FeatureType.Chromosome, 0, 0) }
+    val orphans = MutableList(0) { FeatureBuilder("", "", FeatureType.Chromosome, 0, 0) }
+    val registry = HashMap<String, FeatureBuilder>(0) //id to feature builder
+
+    var line = file.readLine()
+    var lineNumber = 0
+    while (line != null) {
+        if (line.startsWith("#")) {
+            line = file.readLine()
+            lineNumber++
+            continue
+        }
+
+        val split = line.split("\t")
+        val attributes = split[8].split(";")
+        val attributeMap = HashMap<String, String>(0)
+        attributes.forEach {
+            val splitAttribute = it.split("=")
+            attributeMap[splitAttribute[0]] = splitAttribute[1]
+        }
+        val score = if (split[5] == ".") {
+            Double.NaN
+        } else {
+            split[5].toDouble()
+        }
+        val featureBuilder = FeatureBuilder(
+            split[0],
+            split[1],
+            FeatureType.fromGffString(split[2]),
+            split[3].toInt(),
+            split[4].toInt(),
+            score,
+            split[6],
+            split[7],
+            attributeMap
+        )
+
+        if (featureBuilder.attributes["ID"] != null) {
+            registry[featureBuilder.attributes["ID"]!!] = featureBuilder
+        }
+
+        if (featureBuilder.attributes["Parent"] != null) {
+            if (registry.contains(featureBuilder.attributes["Parent"])) {
+                registry[featureBuilder.attributes["Parent"]]?.add(featureBuilder)
+            } else {
+                orphans.add(featureBuilder)
+            }
+        } else {
+            roots.add(featureBuilder)
+        }
+
+        line = file.readLine()
+        lineNumber++
+    }
+
+    for (orphan in orphans) {
+        if (registry.contains(orphan.attributes["Parent"])) {
+            registry[orphan.attributes["Parent"]]?.add(orphan)
+        } else {
+            roots.add(orphan)
+            println("Warning: Orphaned element. Parent ${orphan.attributes["Parent"]} is not in the file")
+        }
+    }
+
+    return roots.map { it.build() }.sortedWith(FeatureComparator())
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Intron.kt b/src/main/kotlin/biokotlin/gff/Intron.kt
new file mode 100644
index 00000000..d11a429a
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Intron.kt
@@ -0,0 +1,12 @@
+package biokotlin.gff
+
+class Intron(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int
+) : Feature(seqid, source, start, end) {
+
+    override fun type(): FeatureType = FeatureType.Intron
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Leader.kt b/src/main/kotlin/biokotlin/gff/Leader.kt
new file mode 100644
index 00000000..c2518a5c
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Leader.kt
@@ -0,0 +1,20 @@
+package biokotlin.gff
+
+/**
+ * Also known as 5' UTR
+ */
+class Leader(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    score: Double = Double.NaN,
+    strand: String = "+",
+    phase: String = ".",
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, score, strand, phase, attributes, children) {
+
+    override fun type(): FeatureType = FeatureType.Leader
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/MRNA.kt b/src/main/kotlin/biokotlin/gff/MRNA.kt
new file mode 100644
index 00000000..4960c755
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/MRNA.kt
@@ -0,0 +1,64 @@
+package biokotlin.gff
+
+/**
+ * Also known as a Transcript
+ */
+class MRNA(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    score: Double = Double.NaN,
+    strand: String = "+",
+    phase: String = ".",
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, score, strand, phase, attributes, children) {
+
+    override fun type(): FeatureType = FeatureType.mRNA
+
+    /**
+     * @return A list of direct children that are an instance of [Coding], in order as defined by [FeatureComparator]
+     */
+    fun coding(): List<Coding> {
+        return children.filterIsInstance<Coding>().toList()
+    }
+
+    /**
+     * @return A list of direct children that are an instance of [Exon], in order as defined by [FeatureComparator]
+     */
+    fun exons(): List<Exon> {
+        return children.filterIsInstance<Exon>().toList()
+    }
+
+    /**
+     * @return A list of direct children that are an instance of [Leader], in order as defined by [FeatureComparator]
+     */
+    fun leaders(): List<Leader> {
+        return children.filterIsInstance<Leader>().toList()
+    }
+
+    /**
+     * @return A list of direct children that are an instance of [Terminator], in order as defined by [FeatureComparator]
+     */
+    fun terminators(): List<Terminator> {
+        return children.filterIsInstance<Terminator>().toList()
+    }
+
+    /**
+     * @return Introns in direct children
+     */
+    fun introns(): List<Intron> {
+        return children
+            .chunked(2)
+            .filter { it.size == 2 }
+            .map {
+                val seqid = it[0].seqid
+                val source = it[0].source
+                val start = it[0].end
+                val end = it[1].start
+                Intron(seqid, source, start, end)
+            }
+    }
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/MakeGffForNewSpecies.kt b/src/main/kotlin/biokotlin/gff/MakeGffForNewSpecies.kt
new file mode 100644
index 00000000..c92ce53a
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/MakeGffForNewSpecies.kt
@@ -0,0 +1,437 @@
+package biokotlin.gff
+
+import biokotlin.util.bufferedWriter
+import htsjdk.samtools.CigarOperator
+import htsjdk.samtools.SAMRecord
+import htsjdk.samtools.SamReaderFactory
+import htsjdk.samtools.ValidationStringency
+import java.io.File
+
+/**
+ * Feature operator classifiers
+ *  * `U` = undefined (`S`, `H`)
+ *  * `I` = intron (`N`)
+ *  * `C` = coding (`M`, `D`, `I`**, `=`, `X`) <- `I` is ignored
+ */
+enum class FeatureOperator {
+    U, C, I
+}
+
+/**
+ * Data class for transcript features
+ *  * `length` = length of feature
+ *  * `operator` = what is the feature type? (*See `FeatureOperator` for more info*)
+ */
+data class TranscriptFeatures(
+    val length: Int,
+    val operator: FeatureOperator,
+    var id: Int
+)
+
+/**
+ * Data class for BED columns
+ *  * `chrom` = contig name
+ *  * `chromStart` = start coordinate for sequence considered
+ *  * `chromEnd` = end coordinate for sequence considered
+ *  * `name` = ID for range
+ *  * `strand` = DNA strand (`+`, `-`, or `.`)
+ */
+data class BedFeatures(
+    val chrom: String,
+    val chromStart: Int,
+    val chromEnd: Int,
+    val name: String,
+    val strand: Char
+    )
+
+/**
+ * Data class for GFF columns (descriptions from https://useast.ensembl.org/info/website/upload/gff3.html)
+ *  * seqid - name of the chromosome or scaffold
+ *  * source - name of the program that generated this feature, or the data source (database/project name)
+ *  * type - type of feature //TODO check which types our features can be (presumably only a subset of all possible types)
+ *  * start - start position (STARTS COUNTING AT 1)
+ *  * end - end position (STARTS COUNTING AT 1, INCLUSIVE)
+ *  * score - floating point value
+ *  * strand - '+', '-', or '.'
+ *  * phase - One of '.', '0', '1' or '2'. '0' indicates that the first base of the feature is the first base of a codon,
+ *      '1' that the second base is the first base of a codon, and so on..
+ *  * attributes - a map of tag-value pairs. Attribute tags with special meanings found at http://gmod.org/wiki/GFF3#GFF3_Format
+ *      * key attributes: Parent, ID, Name (can be same as ID)
+ */
+data class GffFeatures(
+    val seqid: String,
+    val source: String,
+    val type: String,
+    val start: Int,
+    val end: Int,
+    val score: String,
+    val strand: Char,
+    val phase: Char,
+    val attributes: Map<String, String>,
+) {
+    /**
+     * Converts this GffFeature into a GFF row
+     */
+    fun asRow(): String {
+        val sb = StringBuilder()
+        for ((tag, value) in attributes) {
+            sb.append("$tag=$value;")
+        }
+        return "$seqid\t$source\t$type\t$start\t$end\t$score\t$strand\t$phase\t$sb\n"
+    }
+}
+
+// ZACK FUNCTIONS ----
+
+data class TranscriptBpAlignmentStats(val xOrEqArray: MutableList<Int>,
+                                      val eqArray:MutableList<Int>,
+                                      var nCounts:Int = 0,
+                                      var dCounts:Int = 0,
+                                      var numAlignments:Int = 0 )
+
+enum class ExonOperator {
+    L, C, T
+}
+val exonOperatorMap = mapOf<Char,ExonOperator>('L' to ExonOperator.L,'C' to ExonOperator.C,'T' to ExonOperator.T)
+data class ExonBoundary(val length: Int, val operator: ExonOperator)
+
+/**
+ * Function to take a single exon and parse out the operator and the length of that operation.
+ * It needs to have a running sub list of the number characters until it sees an operator.
+ * Then it will convert the temp String into an Int and save out the boundary.
+ */
+fun convertExonStringToBoundary(exonString: String) : List<ExonBoundary> {
+    var runningCount = StringBuilder()
+    val exonBoundaries = mutableListOf<ExonBoundary>()
+    for(element in exonString) {
+        //Check to see if we hit an operator
+        if(element in exonOperatorMap.keys) {
+            //if so, turn the runningCount into an Int and add the boundary to the list.
+            val countString = runningCount.toString()
+            check(!countString.isEmpty()) {"Error parsing exon name.  No counts for operator"}
+
+            exonBoundaries.add(ExonBoundary(countString.toInt(), exonOperatorMap[element]!!))
+            runningCount.clear()
+        }
+        else {
+            runningCount.append("$element")
+        }
+    }
+
+    return exonBoundaries
+}
+
+/**
+ * Function to parse out the exon boundaries from the transcript name.
+ * They take the form: transcript_1234:100L24C-12C-90C10T
+ */
+fun parseExonBoundaries(transcriptName: String) : List<List<ExonBoundary>> {
+    val exonStrings = transcriptName.split(":")[1].split("-")
+
+    return exonStrings.map { convertExonStringToBoundary(it) }
+}
+
+/**
+ * Compute CDS positions (Pair)
+ */
+fun computeCDSPositions(exonBoundaries:List<List<ExonBoundary>>) : Pair<Int,Int> {
+    val sizesOfOperators = exonBoundaries.flatten()
+        .groupBy { it.operator }
+        .map { Pair(it.key, it.value.map { currValue -> currValue.length }.sum()) }
+        .toMap()
+
+    val leaderSize = sizesOfOperators[ExonOperator.L]?:0
+
+    val cdsSize = sizesOfOperators[ExonOperator.C]?:0
+
+    return Pair(leaderSize, leaderSize + cdsSize - 1)
+}
+
+/**
+ * Function that actually creates the xOrEQ array and the eqArray.  It also counts the number of N and D bps
+ */
+fun buildTranscriptBpAlignmentStats(samRecord: SAMRecord) : TranscriptBpAlignmentStats {
+    val xOrEqArray = Array<Int>(samRecord.readLength) { 0 }
+    val eqArray = Array<Int>(samRecord.readLength) { 0 }
+
+    var nCounts = 0
+    var dCounts = 0
+
+    var currentBp = 0
+    val cigarElements = samRecord.cigar.cigarElements
+
+    //Loop through the cigarElements
+    for(element in cigarElements) {
+        val operator = element.operator
+        val count = element.length
+
+        if(operator== CigarOperator.N) {
+            nCounts+=count
+        }
+        else if(operator==CigarOperator.D) {
+            dCounts+=count
+        }
+        //Check to see if consumes query
+        else if(operator.consumesReadBases()) {
+            //If it consumes read bases, we can walk through the length of the CIGAR operator and set the position as a 1
+            for(index in 0 until count) {
+                if(operator.isAlignment) {
+                    xOrEqArray[currentBp] = 1
+                }
+
+                if (operator==CigarOperator.EQ) {
+                    eqArray[currentBp] = 1
+                }
+
+                currentBp++
+            }
+        }
+
+    }
+
+    //Check to see if it was reversed during alignment.  If so we need to flip our arrays.
+    return if(samRecord.readNegativeStrandFlag) {
+        TranscriptBpAlignmentStats(xOrEqArray.reversed().toMutableList(), eqArray.reversed().toMutableList(), nCounts, dCounts,1)
+    }
+    else {
+        TranscriptBpAlignmentStats(xOrEqArray.toMutableList(), eqArray.toMutableList(),nCounts, dCounts,1)
+    }
+
+}
+
+/**
+ * Generate a list of feature lengths from a CIGAR string
+ * @param featureLengths A list of `TranscriptFeature` data objects
+ * @param samRecord A `SAMRecord` object
+ * @param taxaId What reference assembly was this aligned to?
+ * @return A list of `BedFeatures` data objects.
+ */
+fun calculateRanges(featureLengths: List<TranscriptFeatures>, samRecord: SAMRecord, taxaId: String): MutableList<BedFeatures> {
+    var aggLength = samRecord.alignmentStart
+    val contig = samRecord.referenceName
+    val bedFeatures = mutableListOf<BedFeatures>()
+    val zmTranscriptRanges = parseExonBoundaries(samRecord.readName).flatten()
+    val strand = when {
+        samRecord.readNegativeStrandFlag -> '-'
+        else -> '+'
+    }
+    val prefixReadName = samRecord.readName.substringBefore(":")
+
+    for (feature in featureLengths) {
+        if (feature.operator == FeatureOperator.C || feature.operator == FeatureOperator.I) {
+            bedFeatures.add(
+                BedFeatures(
+                    contig,
+                    (aggLength) - 1,
+                    (aggLength + feature.length - 1),
+                    "${taxaId}_${prefixReadName}_${feature.operator}.${feature.id}",
+                    strand
+                )
+            )
+            aggLength += feature.length
+        } else {
+            continue
+        }
+    }
+    return bedFeatures
+}
+
+/**
+ * Generate a list of feature lengths from a CIGAR string
+ * @param samRecord A SAM record entry
+ * @param taxaId What reference assembly was this aligned to?
+ * @return A list of `BedFeatures` data objects.
+ */
+fun buildFeatureRanges(samRecord: SAMRecord, taxaId: String): MutableList<BedFeatures> {
+    val cigarElements = samRecord.cigar.cigarElements
+    val transcriptFeatures = mutableListOf<TranscriptFeatures>()
+
+    // Loop through CIGAR elements
+    var sumLength = 0
+    var i = 0
+    var iCount = 1
+    var cCount = 1
+    var uCount = 1
+    for (element in cigarElements) {
+        val operator = element.operator
+
+        if (operator != CigarOperator.I) {
+            sumLength += element.length
+        }
+        // FIX - change to last iterator [prev: element == cigarElements.last()]
+        if (i == cigarElements.lastIndex && (operator.isAlignment || operator == CigarOperator.D)) {
+            transcriptFeatures.add(TranscriptFeatures(sumLength, FeatureOperator.C, cCount))
+            break
+        }
+        // TODO: 2/6/2022 - how to handle I/D elements?
+        when (operator) {
+            CigarOperator.S, CigarOperator.H -> {
+                transcriptFeatures.add(TranscriptFeatures(element.length, FeatureOperator.U, uCount))
+                sumLength = 0
+                uCount++
+            }
+            CigarOperator.N -> {
+                transcriptFeatures.add(TranscriptFeatures(element.length, FeatureOperator.I, iCount))
+                sumLength = 0
+                iCount++
+            }
+            else -> {
+                if (cigarElements[i + 1].operator == CigarOperator.N || cigarElements[i + 1].operator.isClipping) {
+                    transcriptFeatures.add(TranscriptFeatures(sumLength, FeatureOperator.C, cCount))
+                    cCount++
+                }
+            }
+
+        }
+
+        i++
+    }
+
+    // Check to see if it was reversed during alignment. If so, we need to reverse the ID order (e.g. 1-18 -> 18-1).
+    val featureLengths = when {
+        samRecord.readNegativeStrandFlag -> {
+            val idList = mutableListOf<Int>()
+            transcriptFeatures.forEach { idList.add(it.id) }
+            val revIdList = idList.reversed()
+            var j = 0
+            transcriptFeatures.forEach {
+                it.id = revIdList[j]
+                j++
+            }
+            transcriptFeatures.toMutableList()
+        }
+        else -> {
+            transcriptFeatures
+        }
+    }
+
+    return calculateRanges(featureLengths, samRecord, taxaId)
+}
+
+/**
+ * Writes a BED file based on a SAM file. Generates coordinates for
+ * introns (I), coding regions (C), and whole transcripts. IDs (e.g. C.1)
+ * are based on strand (+, -).
+ * @param samFile A String object referring to a SAM file
+ * @param outputFile A String object referring to the output BED file
+ * @param taxaId What reference assembly was this aligned to?
+ * @param minAlignmentPercent Percent match threshold to alignment region.
+ */
+fun writeBedFile(samFile: String, outputFile: String, taxaId: String, minAlignmentPercent: Double = 0.90) {
+    // Read in SAM and creater iterator object ----
+    println("Processing $taxaId ...")
+    val timeStartRead = System.currentTimeMillis()
+    val samIterator = SamReaderFactory.makeDefault()
+        .validationStringency(ValidationStringency.SILENT)
+        .setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, false)
+        .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
+        .setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, false)
+        .setOption(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS, false)
+        .open(File(samFile))
+        .iterator()
+    val timeEndRead = System.currentTimeMillis() - timeStartRead
+    println("Elapsed read time: $timeEndRead ms")
+
+    // Make buffered reader and write to file ----
+    val timeStart = System.currentTimeMillis()
+    val bw = bufferedWriter(outputFile)
+    while(samIterator.hasNext()) {
+        val currentRecord = samIterator.next()
+
+        // Check for secondary, supplementary, or unmapped reads ----
+        if (!currentRecord.isSecondaryOrSupplementary && !currentRecord.readUnmappedFlag) {
+
+            // Check for minimum alignment % ----
+            val exonBoundaries = parseExonBoundaries(currentRecord.readName)
+            val cdsBoundaries = computeCDSPositions(exonBoundaries)
+            val stats = buildTranscriptBpAlignmentStats(currentRecord)
+            val numMapping = stats.xOrEqArray.slice(cdsBoundaries.first .. cdsBoundaries.second).sum()
+            val alignmentPercentage = (numMapping.toDouble() / (cdsBoundaries.second - cdsBoundaries.first + 1))
+            if (alignmentPercentage >= minAlignmentPercent) {
+                val bedStats = buildFeatureRanges(currentRecord, taxaId) // make BED ranges
+                val transId = "${currentRecord.referenceName}\t${currentRecord.alignmentStart - 1}\t${currentRecord.alignmentEnd}\t${taxaId}_${currentRecord.readName.substringBefore(":")}\t0\t${bedStats[0].strand}\n"
+                bw.write(transId)
+                bedStats.forEach {
+                    bw.write("${it.chrom}\t${it.chromStart}\t${it.chromEnd}\t${it.name}\t0\t${it.strand}\n")
+                }
+            }
+
+        }
+    }
+    bw.close()
+
+    // Get time stats ----
+    val timeEnd = System.currentTimeMillis() - timeStart
+    println("Elapsed BED creation time: $timeEnd ms")
+}
+
+/**
+ * Writes a GFF file based on the SAM file.
+ * @param samFile A String object referring to a SAM file
+ * @param outputFile A String object referring to the output BED file
+ * @param taxaId What reference assembly was this aligned to?
+ * @param minQuality Percent match threshold to alignment region.
+ * @param maxNumber //TODO how to define this?
+ */
+fun writeGffFile(samFile: String, outputFile: String, taxaId: String, minQuality: Double = 0.90, maxNumber: Int = 1) {
+    // Read in SAM and creater iterator object ----
+    println("Processing $taxaId ...")
+    val timeStartRead = System.currentTimeMillis()
+    val samIterator = SamReaderFactory.makeDefault()
+        .validationStringency(ValidationStringency.SILENT)
+        .setOption(SamReaderFactory.Option.CACHE_FILE_BASED_INDEXES, false)
+        .setOption(SamReaderFactory.Option.EAGERLY_DECODE, false)
+        .setOption(SamReaderFactory.Option.INCLUDE_SOURCE_IN_RECORDS, false)
+        .setOption(SamReaderFactory.Option.VALIDATE_CRC_CHECKSUMS, false)
+        .open(File(samFile))
+        .iterator()
+    val timeEndRead = System.currentTimeMillis() - timeStartRead
+    println("Elapsed read time: $timeEndRead ms")
+
+    // Make buffered reader and write to file ----
+    val timeStart = System.currentTimeMillis()
+    val bw = bufferedWriter(outputFile)
+    while(samIterator.hasNext()) {
+        val currentRecord = samIterator.next()
+
+        // Check for secondary, supplementary, or unmapped reads ----
+        if (!currentRecord.isSecondaryOrSupplementary && !currentRecord.readUnmappedFlag) {
+
+            // Check for minimum alignment % ----
+            val exonBoundaries = parseExonBoundaries(currentRecord.readName)
+            val cdsBoundaries = computeCDSPositions(exonBoundaries)
+            val stats = buildTranscriptBpAlignmentStats(currentRecord)
+            val numMapping = stats.xOrEqArray.slice(cdsBoundaries.first .. cdsBoundaries.second).sum()
+            val alignmentPercentage = (numMapping.toDouble() / (cdsBoundaries.second - cdsBoundaries.first + 1))
+            if (alignmentPercentage >= minQuality) {
+                val bedStats = buildFeatureRanges(currentRecord, taxaId) // make BED ranges //TODO make these GFF
+
+                val topID = "${taxaId}_${currentRecord.readName.substringBefore(":")}";
+                //TODO does this parent represent mRNA?
+                //TODO fill in the values
+                val mRNA = GffFeatures(
+                    currentRecord.referenceName,
+                    "SOURCE",
+                    "mRNA",
+                    currentRecord.alignmentStart,
+                    currentRecord.alignmentEnd,
+                    ".",
+                    bedStats[0].strand,
+                    '.',
+                    mapOf("ID" to topID, "Name" to topID)
+                )
+                bw.write(mRNA.asRow())
+                //TODO key line to change
+                bedStats.forEach {
+                    bw.write("${it.chrom}\t${it.chromStart}\t${it.chromEnd}\t${it.name}\t0\t${it.strand}\n")
+                }
+            }
+
+        }
+    }
+    bw.close()
+
+    // Get time stats ----
+    val timeEnd = System.currentTimeMillis() - timeStart
+    println("Elapsed BED creation time: $timeEnd ms")
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Scaffold.kt b/src/main/kotlin/biokotlin/gff/Scaffold.kt
new file mode 100644
index 00000000..476f5369
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Scaffold.kt
@@ -0,0 +1,14 @@
+package biokotlin.gff
+
+class Scaffold(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, attributes = attributes, children = children) {
+
+    override fun type(): FeatureType = FeatureType.Scaffold
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/gff/Terminator.kt b/src/main/kotlin/biokotlin/gff/Terminator.kt
new file mode 100644
index 00000000..9b6e5a12
--- /dev/null
+++ b/src/main/kotlin/biokotlin/gff/Terminator.kt
@@ -0,0 +1,20 @@
+package biokotlin.gff
+
+/**
+ * Also known as 3' UTR
+ */
+class Terminator(
+    seqid: String,
+    source: String,
+    start: Int,
+    end: Int,
+    score: Double = Double.NaN,
+    strand: String = "+",
+    phase: String = ".",
+    attributes: Map<String, String> = emptyMap(),
+    children: List<Feature> = emptyList()
+) : Feature(seqid, source, start, end, score, strand, phase, attributes, children) {
+
+    override fun type(): FeatureType = FeatureType.Terminator
+
+}
\ No newline at end of file
diff --git a/src/main/kotlin/biokotlin/util/FileIO.kt b/src/main/kotlin/biokotlin/util/FileIO.kt
index b4a92097..7eba853d 100644
--- a/src/main/kotlin/biokotlin/util/FileIO.kt
+++ b/src/main/kotlin/biokotlin/util/FileIO.kt
@@ -2,10 +2,10 @@
 
 package biokotlin.util
 
-import java.io.BufferedReader
-import java.io.File
+import java.io.*
 import java.net.URL
 import java.util.zip.GZIPInputStream
+import java.util.zip.GZIPOutputStream
 
 fun bufferedReader(filename: String): BufferedReader {
     return try {
@@ -21,6 +21,18 @@ fun bufferedReader(filename: String): BufferedReader {
             File(filename).bufferedReader()
         }
     } catch (e: Exception) {
-        throw IllegalStateException("FileIO: getBufferedReader: problem getting reader: " + e.message)
+        throw IllegalStateException("FileIO: bufferedReader: problem getting reader: " + e.message)
+    }
+}
+
+fun bufferedWriter(filename: String, append: Boolean = false): BufferedWriter {
+    return try {
+        if (filename.endsWith(".gz")) {
+            BufferedWriter(OutputStreamWriter(GZIPOutputStream(FileOutputStream(filename, append))))
+        } else {
+            BufferedWriter(OutputStreamWriter(FileOutputStream(filename, append)))
+        }
+    } catch (e: Exception) {
+        throw IllegalStateException("FileIO: bufferedWriter: problem getting writer: " + e.message)
     }
 }
\ No newline at end of file