/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nutch.scoring.urlmeta;

import java.util.Collection;
import java.util.Iterator;
import java.util.Map.Entry;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.nutch.crawl.CrawlDatum;
import org.apache.nutch.parse.Parse;
import org.apache.nutch.parse.ParseData;
import org.apache.nutch.protocol.Content;
import org.apache.nutch.scoring.AbstractScoringFilter;
import org.apache.nutch.scoring.ScoringFilter;
import org.apache.nutch.scoring.ScoringFilterException;

/**
 * For documentation:
 * 
 * {@link org.apache.nutch.scoring.urlmeta}
 */
public class URLMetaScoringFilter extends AbstractScoringFilter {

  private static final String CONF_PROPERTY = "urlmeta.tags";
  private static String[] urlMetaTags;

  /**
   * This will take the metatags that you have listed in your "urlmeta.tags"
   * property, and looks for them inside the parseData object. If they exist,
   * this will be propagated into your 'targets' Collection's ["outlinks"]
   * attributes.
   * 
   * @see ScoringFilter#distributeScoreToOutlinks
   */
  @Override
  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
      CrawlDatum adjust, int allCount) throws ScoringFilterException {
    if (urlMetaTags == null || targets == null || parseData == null)
      return adjust;

    Iterator<Entry<Text, CrawlDatum>> targetIterator = targets.iterator();

    while (targetIterator.hasNext()) {
      Entry<Text, CrawlDatum> nextTarget = targetIterator.next();

      for (String metatag : urlMetaTags) {
        String metaFromParse = parseData.getMeta(metatag);

        if (metaFromParse == null)
          continue;

        nextTarget.getValue().getMetaData()
            .put(new Text(metatag), new Text(metaFromParse));
      }
    }
    return adjust;
  }

  /**
   * Takes the metadata, specified in your "urlmeta.tags" property, from the
   * datum object and injects it into the content. This is transfered to the
   * parseData object.
   * 
   * @see ScoringFilter#passScoreBeforeParsing
   * @see URLMetaScoringFilter#passScoreAfterParsing
   */
  @Override
  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) {
    if (urlMetaTags == null || content == null || datum == null)
      return;

    for (String metatag : urlMetaTags) {
      Text metaFromDatum = (Text) datum.getMetaData().get(new Text(metatag));

      if (metaFromDatum == null)
        continue;

      content.getMetadata().set(metatag, metaFromDatum.toString());
    }
  }

  /**
   * Takes the metadata, which was lumped inside the content, and replicates it
   * within your parse data.
   * 
   * @see URLMetaScoringFilter#passScoreBeforeParsing
   * @see ScoringFilter#passScoreAfterParsing
   */
  @Override
  public void passScoreAfterParsing(Text url, Content content, Parse parse) {
    if (urlMetaTags == null || content == null || parse == null)
      return;

    for (String metatag : urlMetaTags) {
      String metaFromContent = content.getMetadata().get(metatag);

      if (metaFromContent == null)
        continue;

      parse.getData().getParseMeta().set(metatag, metaFromContent);
    }
  }

  /**
   * handles conf assignment and pulls the value assignment from the
   * "urlmeta.tags" property
   */
  @Override
  public void setConf(Configuration conf) {
    super.setConf(conf);

    if (conf == null)
      return;

    urlMetaTags = conf.getStrings(CONF_PROPERTY);
  }

}
