parse/modules/bibtex/prop.js

/**
 * @module input/bibtex
 */

import parseName from '../../name'
import parseDate from '../../date'

/**
 * To match months.
 *
 * @access private
 * @constant propMap
 * @type {Array<RegExp>}
 * @default
 */
const months = [
  /jan(uary)?\.?/i,
  /feb(ruary)?\.?/i,
  /mar(ch)?\.?/i,
  /apr(il)?\.?/i,
  /may\.?/i,
  /jun(e)?\.?/i,
  /jul(y)?\.?/i,
  /aug(ust)?\.?/i,
  /sep(tember)?\.?/i,
  /oct(ober)?\.?/i,
  /nov(ember)?\.?/i,
  /dec(ember)?\.?/i
]

/**
 * Parse date into list of CSL date object.
 *
 * @access private
 * @method parseBibtexDate
 *
 * @param {String} value - date
 * @return {Object} CSL date object
 */
const parseBibtexDate = function (value) {
  if (/{|}/.test(value)) {
    return {literal: value.replace(/[{}]/g, '')}
  } else {
    return parseDate(value)
  }
}

/**
 * Parse name into CSL name objects.
 *
 * @access private
 * @method parseBibtexName
 *
 * @param {String} name - name
 * @return {Object} CSL name object
 */
const parseBibtexName = function (name) {
  if (/{|}/.test(name)) {
    return {literal: name.replace(/[{}]/g, '')}
  } else {
    return parseName(name)
  }
}

/**
 * Parse list of names into list of CSL name objects.
 *
 * @access private
 * @method parseBibtexNameList
 *
 * @param {String} list - list of names separated by ' and '
 * @return {Array<Object>} array of CSL name objects
 */
const parseBibtexNameList = function (list) {
  const literals = []

  // To split author names by ' and '  while supporting literal names like
  // '{National Academy for Arts and Sciences}' (i.e. some name with ' and '
  // in it), we first pick a escaping character ('%')...

  // ...escape all '%'s and remove all literals ('{...}')...
  list = list.replace(/%/g, '%0').replace(/{.*?}/g, m => `%[${literals.push(m) - 1}]`)

  // ...split the string...
  return list.split(' and ')
  // ...re-insert all literals and unescape all '%'s...
    .map(name => name.replace(/%\[(\d+)\]/, (_, i) => literals[+i]).replace(/%0/g, '%'))
  // ...and parse the names to make sure literals are actually preserved.
    .map(parseBibtexName)
}

const richTextMappings = {
  textit: 'i',
  textbf: 'b',
  textsc: 'sc',
  textsuperscript: 'sup',
  textsubscript: 'sub'
}

/**
 * @access private
 * @param {String} text - BibTeX rich text
 * @return {String} CSL-JSON rich text
 */
const parseBibtexRichText = function (text) {
  // tokens at even indices are text, odd indices are markup
  let tokens = text.split(/((?:\\text[a-z]+)?{|})/)

  let closingTags = []

  // there could be a top-level tag if the first and last characters are brackets.
  // if it isn't, it's set to false later on
  let hasTopLevelTag = text[0] === '{' && text[text.length - 1] === '}'

  tokens = tokens.map((token, index) => {
    // return text as-is
    if (index % 2 === 0) {
      return token

    // handle style tags
    } else if (token[0] === '\\') {
      let tag = richTextMappings[token.slice(1, -1)]
      closingTags.push(`</${tag}>`)
      return `<${tag}>`

    // handle nocase tags (e.g. text wrapped in {...})
    } else if (token === '{') {
      closingTags.push('</span>')
      return '<span class="nocase">'

    // handle closing tags
    } else if (token === '}') {
      // If the top-level tag gets closed (the one at index 0, i.e. length 1) and
      // this isn't the last markup tag yet (at index - 1, i.e. length -2), there
      // can't be a top-level tag
      if (closingTags.length === 1 && index !== tokens.length - 2) {
        hasTopLevelTag = false
      }
      return closingTags.pop()
    }
  })

  // remove top-level tag (e.g. second and second to last tokens, equivalent to the
  // first and last markup tokens)
  if (hasTopLevelTag) {
    tokens.splice(0, 2)
    tokens.splice(-2, 2)
  }

  return tokens.join('')
}

/**
 * Map holding information on BibTeX-JSON fields.
 *
 *  * If true, field name should stay the same
 *  * If false, field should be ignored
 *  * If string, use as field name
 *  * Special strings are used to merge into complex objects
 *
 * @access private
 * @constant propMap
 * @default
 */
const propMap = {
  address: 'publisher-place',
  author: true,
  booktitle: 'container-title',
  doi: 'DOI',
  date: 'issued',
  edition: true,
  editor: true,
  isbn: 'ISBN',
  issn: 'ISSN',
  issue: 'issue',
  journal: 'container-title',
  language: true,
  location: 'publisher-place',
  note: true,
  number: 'issue',
  numpages: 'number-of-pages',
  pages: 'page',
  pmid: 'PMID',
  pmcid: 'PMCID',
  publisher: true,
  series: 'collection-title',
  title: true,
  url: 'URL',
  volume: true,

  // prepare for merge
  year: 'issued:date-parts.0.0',
  month: 'issued:date-parts.0.1',
  day: 'issued:date-parts.0.2',

  // ignore
  crossref: false,
  keywords: false
}

/**
 * Transform property and value from BibTeX-JSON format to CSL-JSON
 *
 * @access protected
 * @method parseBibTeXProp
 *
 * @param {String} name - Field name
 * @param {String} value - Field value
 *
 * @return {Array<String>} Array with new name and value
 */
const parseBibTeXProp = function (name, value) {
  if (!propMap.hasOwnProperty(name)) {
    logger.info('[set]', `Unknown property: ${name}`)
    return undefined
  } else if (propMap[name] === false) {
    return undefined
  }

  const cslProp = propMap[name] === true ? name : propMap[name]
  const cslValue = ((name, value) => {
    switch (name) {
      case 'author':
      case 'editor':
        return parseBibtexNameList(value)

      case 'issued':
        return parseBibtexDate(value)

      case 'edition':
        // return parseOrdinal(value)
        return value

      case 'issued:date-parts.0.1':
        return parseFloat(value) ? value : months.findIndex(month => month.test(value)) + 1

      case 'page':
        return value.replace(/[—–]/, '-')

      case 'title':
        return parseBibtexRichText(value)

      default:
        return value.replace(/[{}]/g, '')
    }
  })(cslProp, value)

  return [cslProp, cslValue]
}

export {
  parseBibTeXProp as parse,
  parseBibTeXProp as default
}