diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 0ddf057d60cfb77578f48ea135b663f813c93bb2..35412ae56cfa23d45faad0969ddfcfd8fab1d99c 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -9,7 +9,8 @@ cache:
     - node_modules/
 
 stages:
-  - pfam
+  - get-data
+  - format-data
   - build
   - deploy
 
@@ -40,19 +41,33 @@ stages:
 #     when: on_success
 #     expire_in: "30 days"
 
-pfam:
+get-pfam:
   image: ubuntu:23.04
-  stage: pfam
+  stage: get-data
   before_script:
     - apt update && apt install -y curl
     - curl https://ftp.ebi.ac.uk/pub/databases/Pfam/current_release/Pfam-A.hmm.dat.gz --output pfam-data.gz
     - ls -al
   script:
     - gunzip pfam-data.gz
-    - ls -al
-  # artifacts:
-  #   paths:
-  #     - mycv.p
+    - ls -al pfam-data/
+  artifacts:
+    paths:
+      - pfam-data/Pfam-A.hmm.dat
+
+format-pfam:
+  image: python:3.11
+  needs:
+    - "get-pfam"
+  before_script:
+    - pip install pandas
+    - mv pfam-data/Pfam-A.hmm.dat scripts
+  script:
+    - ./scripts/pfam-a-hmm-to-csv.py
+    - mv ./scripts/Pfam-A.hmm.dat.csv content/_partial/pfam-a-hmm.csv
+  artifacts:
+    paths:
+      - content/_partial/pfam-a-hmm.csv
 
 build:dev:
   extends: .build